#!/usr/local/bin/perl
# -*-Fundamental-*-

#  NEITHER THE AUTHOR, NETWORK APPLIANCE, INC. NOR PERFORCE SOFTWARE
#  MAKE ANY WARRANTY, EXPLICIT OR IMPLIED, AS TO THE CORRECTNESS,
#  FITNESS FOR ANY APPLICATION, NOR THE SAFETY OF THE snap_checkpoint SOFTWARE. 
#

# $Id: //guest/stephen_vance/utils/snap_checkpoint/snap_checkpoint#2 $
#
# Original Author: Richard Geiger, Network Appliance, Inc.
#

#  This script is intended both as an illustration, and, potentially,
#  as an actual tool. For general information about the technique
#  implemented herein, see
#
#    http://public.perforce.com/guest/richard_geiger/utils/snap_checkpoint/snap_checkpoint.html
#
#  While this script may work by modifying only the configuration variables below,
#  it is your responsibility to verify that it work in your environment.
#
#  In particular, you should verify that the p4d_lock() function works
#  correctly at your site. You can do this by executing
#  "snap_checkpoint lockcheck" (after having set the configuration
#  variables, below). If things are working right, the script will
#  report that the database has been locked. You can then (from second
#  login shell) attempt to execute a p4 command ("p4 user -o' is
#  fine); this should block until the lock is released. Here's what
#  you should see:
#
#   rmg $ ./sn lockcheck
#   snap_checkpoint: requesting lock on all db.* tables...
#   snap_checkpoint: /usr/big3_llock/rmg/p4bench_root locked.
#   snap_checkpoint: press return to unlock 
#
#      (At this point all commands to your Perforce server should block,
#       until you press return)
#
#   snap_checkpoint: /usr/big3_llock/rmg/p4bench_root unlocked.
#
#  

use Carp;
use strict;
$| = 1;

my $Myname;
($Myname = $0) =~ s%^.*/%%;

# Configuration Settings
#
# REVIEW THESE SETTINGS, AND ADJUST THEM AS NECESSARY FOR USE IN YOUR
# ENVIRONMENT:
#  
# $P4PORT for the server you wish to checkpoint;
#
my $P4PORT	  = "perforce:1666";

# $P4ROOT for the server you wish to checkpoint
#
my $P4ROOT	  = "/usr/p4/root";

# The path to the "p4" client to be used
#
my $P4		  = "/usr/local/bin/p4";

# The path to the "p4d" server to be used
#
my $P4D		  = "/usr/local/bin/p4d";

# The path to the directory where the checkpoint should be written
#
my $P4CHECKPOINT  = "/usr/p4/checkpoints";

# The path to the journal file
#
my $P4JOURNAL	  = "/usr2/p4/journal";

# The name of the journal counter to use
# (p4d r00.1 won't let us use "journal" :-()
#
my $P4COUNTER	  = "snap_journal";

# The name of the NetApp filer that holds the volume where
# $P4ROOT is stored
#
#
my $FILER	  = "ourfiler";

# The volume name of the volume where $P4ROOT is stored
#
my $VOLUME	  = "perforce";

# Path to the host's "rsh" command
#
my $RSH		  = "/bin/rsh";

# Path to the host's "gzip" command
#
my $GZIP 	  = "/usr/local/bin/gzip";

# Perforce server version:
#
my $p4dVers	  = "r2001.1";

#  remove the following statement after configuring the settings above
#
die "You must configure the settings above before using this script!";

my $dbfiles;

if ($p4dVers =~ /^r(99.1|99.2|2000.1)$/)
  {
    #  The locking order of the db.* files, good for
    #
    #   r99.1
    #   r99.2
    #   r2000.1
    #   r2000.2
    #
    #  per information supplied by Perforce Software:
    #
    #    http://www.perforce.com/perforce/doc.991/schema/index.html
    #    http://www.perforce.com/perforce/doc.992/schema/index.html
    #    http://www.perforce.com/perforce/doc.001/schema/index.html
    #    http://www.perforce.com/perforce/doc.002/schema/index.html
    #  
    $dbfiles = <<EOL;
db.counters
db.user
db.group
db.depot
db.domain
db.view
db.review
db.have
db.integ
db.locks
db.rev
db.revcx
db.working
db.change
db.desc
db.job
db.jobpend
db.jobdesc
db.fix
db.fixrev
db.boddate
db.bodtext
db.ixdate
db.ixtext
db.protect
db.trigger

EOL
  }
elsif ($p4dVers =~ /^r(2001.1)$/)
  {
    #  The locking order of the db.* files, good for
    #
    #   r2001.1
    #
    #  per information supplied by Perforce Software:
    #
    #    http://www.perforce.com/perforce/doc.011/schema/index.html
    #  
    $dbfiles = <<EOL;
db.counters
db.logger
db.user
db.group
db.depot
db.domain
db.view
db.review
db.have
db.label
db.integ
db.integed
db.resolve
db.locks
db.rev
db.revcx
db.working
db.change
db.desc
db.job
db.jobpend
db.jobdesc
db.fix
db.fixrev
db.boddate
db.bodtext
db.ixdate
db.ixtext
db.protect 
db.trigger
EOL

  }
elsif ($p4dVers =~ /^r(2002.1)$/)
  {
    #  The locking order of the db.* files, good for
    #
    #   r2002.1
    #
    #  per information supplied by Perforce Software:
    #
    #    http://www.perforce.com/perforce/doc.021/schema/index.html
    #  
    $dbfiles = <<EOL;
db.counters
db.logger
db.user
db.group
db.depot
db.domain
db.view
db.review
db.have
db.label
db.integ
db.integed
db.resolve
db.locks
db.rev
db.revcx
db.working
db.change
db.changex
db.desc
db.job
db.jobpend
db.jobdesc
db.fix
db.fixrev
db.boddate
db.bodtext
db.ixdate
db.ixtext
db.protect 
db.trigger
EOL

  }
elsif ($p4dVers =~ /^r(2002.2|2003.1)$/)
  {
    #  The locking order of the db.* files, good for
    #
    #   r2002.2
    #   r2003.1
    #
    #  per information supplied by Perforce Software:
    #
    #    http://www.perforce.com/perforce/doc.022/schema/index.html
    #    http://www.perforce.com/perforce/doc.031/schema/index.html
    #  
    $dbfiles = <<EOL;
db.counters
db.logger
db.user
db.group
db.depot
db.domain
db.view
db.review
db.have
db.label
db.integ
db.integed
db.resolve
db.locks
db.rev
db.revcx
db.working
db.change
db.changex
db.desc
db.job
db.jobpend
db.jobdesc
db.fix
db.fixrev
db.boddate
db.bodtext
db.ixdate
db.ixtext
db.protect
db.trigger
db.message
EOL

  }
elsif ($p4dVers =~ /^r(2003.2|2004.1)$/)
  {
    #  The locking order of the db.* files, good for
    #
    #   r2003.2
    #   r2004.1
    #
    #  per information supplied by Perforce Software:
    #
    #    http://www.perforce.com/perforce/doc.032/schema/index.html
    #    http://www.perforce.com/perforce/doc.041/schema/index.html
    #  
    $dbfiles = <<EOL;
db.counters
db.logger
db.user
db.group
db.depot
db.domain
db.view
db.review
db.have
db.label
db.integ
db.integed
db.resolve
db.locks
db.rev
db.revcx
db.working
db.change
db.changex
db.desc
db.job
db.jobpend
db.jobdesc
db.fix
db.fixrev
db.boddate
db.bodtext
db.ixdate
db.ixtext
db.protect
db.trigger
db.message
db.monitor
EOL

  }
elsif ($p4dVers =~ /^r(2004.2)$/)
  {
    #  The locking order of the db.* files, good for
    #
    #   r2004.2
    #
    #  per information supplied by Perforce Software:
    #
    #    http://www.perforce.com/perforce/doc.042/schema/index.html
    #  
    $dbfiles = <<EOL;
db.counters
db.logger
db.user
db.group
db.depot
db.domain
db.view
db.review
db.have
db.label
db.integ
db.integed
db.resolve
db.locks
db.rev
db.revcx
db.revpx
db.working
db.traits
db.trigger
db.change
db.changex
db.desc
db.job
db.jobpend
db.jobdesc
db.fix
db.fixrev
db.boddate
db.bodtext
db.ixdate
db.ixtext
db.protect
db.message
db.monitor
EOL

  }
else
  {
    print <<EOM;

Unrecognized Perforce server version "$p4dVers". You will need to confirm
the db.* locking order for this version of the Perforce server, and modify
this script to recognize it before proceeding. See, e.g.,

  http://www.perforce.com/perforce/doc.991/schema/index.html
  http://www.perforce.com/perforce/doc.992/schema/index.html
  http://www.perforce.com/perforce/doc.001/schema/index.html
  http://www.perforce.com/perforce/doc.002/schema/index.html
  http://www.perforce.com/perforce/doc.011/schema/index.html
  http://www.perforce.com/perforce/doc.021/schema/index.html

EOM
    exit 1;
  }

my @dbfiles = split(/\n/, $dbfiles);


sub dirname
{
  my ($dir) = @_;
 
  $dir =~ s%^$%.%; $dir = "$dir/";
  if ($dir =~ m%^/[^/]*//*$%) { return "/"; }
  if ($dir =~ m%^.*[^/]//*[^/][^/]*//*$%)
    { $dir =~ s%^(.*[^/])//*[^/][^/]*//*$%$1%; { return $dir; } }
  return ".";
}


use Fcntl ':flock'; # import LOCK_* constants

sub p4d_lock
{
  no strict 'refs';
  my($sleep) = @_;

  my $nlocked = 0;

  foreach my $file (@dbfiles)
    {
      # With r01.1 (at least), empty tables don't have files yet. Real
      # servers almost certainly will have all the db.* files, but
      # this is here just in case
      #
      if (! -f "$P4ROOT/$file") { next; }

      my $handle = $file; $handle =~ s/^db\.//; $handle =~ tr/a-z/A-Z/;

      #  Note: Solaris seems to need the "+<" open mode in order to all
      #  LOCK_EX locks to be placed.
      #
      if (! open($handle, "+<$P4ROOT/$file"))
        {
          &msg("$Myname: can't open \"$P4ROOT/$file\": $!\n", undef, undef, "p4");
          exit 1;
        }
      if (! flock($handle, LOCK_EX))
        {    
          &msg("$Myname: can't lock \"$P4ROOT/$file\": $!\n", undef, undef, "p4");
          exit 1;
        }
      $nlocked++;
    }

  if ($nlocked <= 0) { die "no tables were locked!"; }

  use strict 'refs';
  &msg("$Myname: $P4ROOT locked.\n");

  if ($sleep) { while (1) { sleep 60*60*24; } }
}


sub p4d_unlock
{
  no strict 'refs';
  foreach my $file (reverse(@dbfiles))
    {
      my $handle = $file; $handle =~ s/^db\.//; $handle =~ tr/a-z/A-Z/;
      close $handle;
    }
  use strict 'refs';
  &msg("$Myname: $P4ROOT unlocked.\n");
}


#  Run a command, returning status and output; terminate
#  on any error.
#  
sub s
{
  my ($cmd) = @_;
  my ($sts, $output);

  print("> $cmd\n");

  if (! open(CMD, "$cmd 2>&1 |"))
    { die "can't open \"$cmd 2>&1 |\": $!"; }
  
  while (<CMD>) { print(": $_"); $output .= $_; }
  close CMD;

  if ($sts = $?)
    {
      my $sig = $sts & 0x0f;
      $sts = $sts >> 8;
      die "\"$cmd\" exited with signal $sig status $sts";
    }
  return ($sts, $output);
}

sub ts
{
  my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time);
  return sprintf("%04d%02d%02d%02d%02d%02d", 1900+$year, $mon+1, $mday, $hour, $min, $sec);
}

#  The following function is taken from the NetApp "p4d_admin" script.
#  I've decided to try and keep the function the same, verbatim, as it
#  is there, so things might be done a bit differently than they
#  otherwise would.  In particular, I've stubbed out the "&msg"
#  function with a dummy version that ignore the later args which are
#  used in the context of p4d_admin, but not here.
#
#
sub msg { my ($msg) = @_; print STDERR $msg; }

sub p4d_snap_checkpoint
{
  # First, delete the previous snapshot (if it exists).  (We keep the
  # old one around, just in case, until we need to make a new one.
  #
  my ($sts, $output) = &s("$RSH $FILER snap delete perforce checkpoint 2>&1");
  chop $output;

  my @output = split(/\n/, $output);
  my $bad = 0;
  foreach my $s (@output)
    { if ($s !~ /^(deleting snapshot\.+|No such snapshot.)$/) { $bad = 1; last; } }

  if ($sts || $bad)
    {
      &msg("$Myname: couldn't delete snapshot.\n", undef, undef, "p4");
      exit 1;
    }

  # Next, look up the journal sequence number counter...  Logically,
  # we'd prefer to do this with the database locked, but the danger of
  # a rogue checkpoint -jc happening seems tolerable...
  #
  ($sts, $output) = &s("$P4 -p $P4PORT counters");

  my $journal_counter = "";
  foreach $_ (split(/\n/, $output))
    { if (/^$P4COUNTER = (\d+)/) { $journal_counter = $1; } }

  if ($journal_counter eq "")
    {
      &msg("$Myname: can't get $P4COUNTER counter, nothing done.\n", undef, undef, "p4");
      exit 1;
    }

  # Now increment the counter
  #
  my $new_journal_counter = $journal_counter + 1;

  ($sts, $output) = &s("$P4 -p $P4PORT counter -f $P4COUNTER $new_journal_counter");
  chop $output;
  if ($output !~ /^Counter $P4COUNTER set\.$/)
    {
      &msg("$Myname: couldn't increment $P4COUNTER counter:\n$output\n", undef, undef, "p4");
      exit 1;
    }

  # Next, we lock down the entire database
  #
  &p4d_lock(0);

  my $tstamp = &ts;

  # Now, we snapshot the database filesystem...
  #
  ($sts, $output) = &s("$RSH $FILER snap create $VOLUME checkpoint 2>&1");
  chop $output;
  if ($sts || $output !~ /^creating snapshot\.+$/)
    {
      &msg("$Myname: couldn't create snapshot.\n", undef, undef, "p4");
      exit 1;
    }

  # OK, now we are confident that we have a good snapshot. We can
  # proceed on the assumption that that the "off line" operations of
  # copying the journal and "p4 -jd" will work. All we need to do here
  # is to truncate the journal, and put the server back on-line by
  # unlocking it...

  # Truncate the journal:
  #
  if (! open(J, ">$P4JOURNAL"))
    {
      &msg("$Myname: couldn't truncate \"$P4JOURNAL\": $!\n", undef, undef, "p4");
      exit 1;
    }
  close J;
  &msg("$Myname: \"$P4JOURNAL\" truncated.\n");
  

  # Now we can release the lock...
  #
  &p4d_unlock;

  # At this point, the system is online for users.

  # Copy the journal (from the snapshot).
  #
  my $journaln = "$P4CHECKPOINT/$tstamp.jnl.$journal_counter";

  my $P4JOURNALDIR = &dirname($P4JOURNAL);
  my $P4JOURNALFILE;
  ($P4JOURNALFILE = $P4JOURNAL) =~ s%^.*/%%;

  ($sts, $output) = &s("/bin/cp -p $P4JOURNALDIR/.snapshot/checkpoint/$P4JOURNALFILE $journaln");

  if ($sts)
    {
      &msg("$Myname: couldn't copy \"$P4JOURNALDIR/.snapshot/checkpoint/$P4JOURNALFILE\".\n",
              undef, undef, "p4");
      exit 1;
    }

  # Compress the saved journal segment...
  # (Ignore errors - they can be dealt with later)
  #
  &s("$GZIP $journaln");

  # Diddle $P4ROOT so that this checkpoint is done from the snapshot
  # we just took...
  #
  my $P4ROOT_sav = $P4ROOT;
  $P4ROOT .= "/.snapshot/checkpoint"; $ENV{"P4ROOT"} = $P4ROOT;

  # And then run the checkpoint from the snapshot...
  #  
  my ($sts, $output)
    = &s("$P4D -r $P4ROOT -p $P4PORT -z -jd $P4CHECKPOINT/$tstamp.ckp.$new_journal_counter.gz");

  if ($sts || $output !~ /^Dumping to $P4CHECKPOINT\/$tstamp\.ckp\.$new_journal_counter\.gz\.\.\.$/)
    {
      &msg("$Myname: checkpoint failed.\n", undef, undef, "p4");
      exit 1;
    }

  # Mom taught me to put things back where I'd got 'em.
  #
  $P4ROOT = $P4ROOT_sav; $ENV{"P4ROOT"} = $P4ROOT;

  return $sts;
}

if ($#ARGV >= 0)
  {
    if ($ARGV[0] eq "lockcheck")
      {
        shift @ARGV;
        &msg("$Myname: requesting lock on all db.* tables...\n");
        &p4d_lock;
        &msg("$Myname: press return to unlock ");
        $a = <>;
        &p4d_unlock;
        exit 0;
      }
    else
      {
        print STDERR <<EOM;

$Myname: usage $Myname [ lockcheck ]

  *** Be sure you've edited the configuration variables at the top of
  *** this script to correctly refelect your configuration!

EOM
	exit 1;
      }
  }

exit (&p4d_snap_checkpoint());