p4-watchreplica #1

  • //
  • guest/
  • vk_thandesh/
  • bin/
  • p4-watchreplica
  • View
  • Commits
  • Open Download .zip Download (7 KB)
#!/usr/bin/perl

=head1 NAME

p4-watchreplica - Check replication health condition

=head1 SYNOPSIS

p4-watchreplica {-m/aster <master address> -r/eplica <replica address>}

=head1 DESCRIPTION

     Check replication health condition

     Script does couple of good things. 
     It checks to make sure there is master replica relation between servers
     It allows for configurable lagging of journal catch up

     Ideally you should set this script as a cronjob and let that cron send you mail

=head1 OPTIONS

    -help           - Output help information.
    -m/aster        - Perforce master host address
    -r/eplica       - Perforce replica host address

=head1 EXAMPLES

p4-watchreplica -m master:2666 -r replica:2777

=head1 SEE ALSO

p4 help pull

=head1 CONTACT

Thandesha V K - [email protected]

=cut

use warnings;
use strict;

use Getopt::Long   qw( GetOptions );
use Pod::Usage     qw( pod2usage );

my $P4               = "/sbin/p4";
my $ALLOWED_SEEK_LAG = 5000;

################################################################################
# NAME :
#    validate_input
#
# PURPOSE :
#    Process and validate the options specified on the command line by the user.
#
# PARAMETERS :
#    NONE
#
# GLOBALS :
#    $P4            - Path to p4 command
#
# COMMENTS:
#
################################################################################
sub validate_input
{
    my %options;

    GetOptions( \%options, 'help', 'master|m=s', 'replica|r=s' )
    or pod2usage( "ERROR: Error in usage." );

    if ( $options{ help } ) {
        pod2usage(  -exitval => 0, -verbose => 1 );
    }

    if ( !$options{master} ) {
        pod2usage( -message =>"ERROR: Please pass Perforce master host address", -exitval => 1, -verbose => 1 );
    }
    my @p4out = `$P4 -p $options{master} info`;
    if ( $?/256 ) {
        print "ERROR: $options{master} doesn't seem to be a valid perforce server!!";
        exit (1);
    }

    if ( !$options{replica} ) {
        pod2usage( -message =>"ERROR: Please pass Perforce replica host address", -exitval => 1, -verbose => 1 );
    }
    @p4out = `$P4 -p $options{replica} info`;
    if ( $?/256 ) {
        print "ERROR: $options{replica} doesn't seem to be a valid perforce server!!";
        exit (1);
    }
    @p4out = `$P4 -p $options{replica} configure show`;
    if ( $?/256 ) {
        print "ERROR: Trouble running \"p4 configure show\" for $options{replica}!!";
        exit (1);
    }
    my $found = 0;
    foreach my $configure ( @p4out ) {
       #P4TARGET=master:2666 (configure)
       if ( $configure =~ m/^P4TARGET=$options{master} \(configure\)$/ ) {
            $found = 1;
            last;
       } 
    }
    if ( !$found ) {
        print "ERROR: Can't find master->replica relation between $options{master} and $options{replica}!!";
        exit (1);
    }

    return \%options;
}

################################################################################
# NAME :
#    check_replication
#
# PURPOSE :
#    Compare journal count as well as seek position with some configured seek delay
#
# PARAMETERS :
#    $master        - Master perforce server
#    $replica       - Replica perforce server
#
# GLOBALS :
#    $P4                 - Path to p4 command
#    $ALLOWED_SEEK_LAG   - Configurable seek lag limit
#
# COMMENTS:
#
################################################################################
sub check_replication
{
    my ( $master, $replica ) = @_;
    my ( $journal_rep, $seek_rep, $journal_master, $seek_master, $seek_time, $cur_time);
    my @p4out = `$P4 -p $replica pull -l -j`;
    if ( $?/256 ) {
        print "ERROR: Trouble checking \"Replica status\" for $replica!!";
        exit (1);
    }
    foreach my $line ( @p4out ) {
        #Current replica journal state is:       Journal 5034,   Sequence 144466684940.
        if ( $line =~ m/^Current replica journal state is:\s+Journal (\d+),\s+Sequence (\d+).$/ ) {
             $journal_rep = $1;
             $seek_rep    = $2;
        }
        #Current master journal state is:        Journal 5034,   Sequence 144466684940.
        if ( $line =~ m/^Current master journal state is:\s+Journal (\d+),\s+Sequence (\d+).$/ ) {
             $journal_master = $1;
             $seek_master    = $2;
        }
        #The statefile was last modified at:     2013/02/25 15:30:57.
        if ( $line =~ m/^The statefile was last modified at:\s+(\d+\/\d+\/\d+ \d\d:\d\d:\d\d).$/ ) {
             $seek_time = $1;
        }
    }
    if ( $journal_rep == $journal_master ) {
         my $journal = `$P4 -p $master counter journal`;
         if ( $?/256 ) {
             print "ERROR: Trouble checking \"Journal value\" for $master!!";
             return (1);
         }
         if ( $journal_rep == $journal ) {
              my $diff = $seek_master - $seek_rep;
              if ( $seek_rep == $seek_master ) {
print<<EOF;
INFO: Replication is going just fine. Relax :)
EOF
              } elsif ( $diff <= $ALLOWED_SEEK_LAG ) {
print<<EOF;
WARNING: Replica is in good health but busy replicating and lagging quite a bit from master server.
         MASTER ($master) journal                      = $journal_master
         REPLICA ($replica) journal                    = $journal_rep

         MASTER ($master) journal SEEK position        = $seek_master
         REPLICA ($replica) journal SEEK position      = $seek_rep
=====================================================================
         REPLICA is lagging in SEEK position by        = $diff 
EOF
                  exit (1);
              } else {
print<<EOF;
ERROR: Replication is lagging behind more than threshold. Please Check
         MASTER ($master) journal                      = $journal_master
         REPLICA ($replica) journal                    = $journal_rep
         Current journal counter from master           = $journal

         MASTER ($master) journal SEEK position        = $seek_master
         REPLICA ($replica) journal SEEK position      = $seek_rep

         Configurable allowed SEEK lag                 = $ALLOWED_SEEK_LAG
         REPLICA is lagging in SEEK position by        = $diff
EOF
              }
         } else {
print<<EOF;
ERROR: MASTER ($master) and REPLICA ($replica) journal counters are matching but they are not same as current journal value
       Check if
       1. Just now journal got rotated and replication is yet to catch up
       2. Replication has stopped 
       3. Replication has some problem and failing

       MASTER ($master) journal             = $journal_master
       REPLICA ($replica) journal           = $journal_rep
=========================================================
       Current journal counter from master  = $journal
EOF
            exit(1);
        }
    } else {
       print "ERROR: MASTER ($master) and REPLICA ($replica) journal counters are not matching each other!!";
       exit (1);
    }
}

################################################################################
###############################   main   #######################################
################################################################################
$\="\n";
my $options = validate_input();
check_replication( $options->{master}, $options->{replica});

# Change User Description Committed
#1 8262 VK Thandesh Check replication health condition