backup_functions.sh #1

#!/bin/bash
#==============================================================================
# Copyright and license info is available in the LICENSE file included with
# the Server Deployment Package (SDP), and also available online:
# https://swarm.workshop.perforce.com/projects/perforce-software-sdp/view/main/LICENSE
#------------------------------------------------------------------------------

set -u

# Common functions used in all backup scripts.

#------------------------------------------------------------------------------
# Verify key variables in the shell environment exist, or else abort.
#------------------------------------------------------------------------------
check_vars () {
   if [[ -z "$SDP_INSTANCE" || -z "$P4HOME" || -z "$P4PORT" || -z "$P4ROOT" || -z "$P4JOURNAL" || -z "$P4BIN" || -z "$P4DBIN" || -z "$P4TICKETS" || -z "$KEEPCKPS" || -z "$KEEPJNLS" || -z "$KEEPLOGS" || -z "$CHECKPOINTS" || -z "$LOGS" || -z "$OSUSER" ]]; then
      echo "Use p4master_run or source p4_vars when calling this script."
      echo "Required external variable not set. Abort!"
      exit 1
   fi
}

#------------------------------------------------------------------------------
# is_edge ($SeverID, $RootDir)
#
# Determine if a given ServerID is an edge server or not, checking a given
# database root dir (e.g. $P4ROOT or $OFFLINE_DB).
#
# Input:
# $1 - SeverID (required)
# $2 - RootDir (optional, defaults to $P4ROOT)
#
# Output YES if an edge server, NO otherwise.
#
#------------------------------------------------------------------------------
is_edge () {
   local ServerID=${1:-Unset}
   local RootDir=${2:-$P4ROOT}
   local ServicesData=
   local EdgeCheck=

   # Extract a slice of db.server referencing the given ServerID,
   # and then grab the field containing Services data.
   ServicesData=$($P4DBIN -r $RootDir -J off -L /dev/null -k db.server -jd - 2>&1 |\
      $GREP "@db.server@ @${ServerID}@" | $CUT -d '@' -f 13)

   # Do a bitwise operation to determine if the ServicesData value indicates
   # this is an edge server.
   if [[ -n "$ServicesData" ]]; then
      EdgeCheck=$(($ServicesData & 4096))

      if [[ "$EdgeCheck" -gt 0 ]]; then
         echo YES
      else
         echo NO
      fi
   else
      echo NO
   fi
}

#------------------------------------------------------------------------------
# Set variables for use in various scripts:
# RC=path to the init scripts
# OFFLINE_DB=path to offline db directory
# EDGESERVER=1 if this is an edge server, 0 otherwise.
#
# This must be called after loading the standard shell environment by
# doing:
# source /p4/common/bin/p4_vars N
#
# This sets P4HOME, SERVERID, etc. needed by this function.
#------------------------------------------------------------------------------
set_vars () {
   RC=$P4HOME/bin/p4d_${SDP_INSTANCE}_init
   OFFLINE_DB=${P4HOME}/offline_db
   local ServicesData=
   local EdgeCheck=

   if [[ -n "$SERVERID" ]]; then
      if [[ "$(is_edge $SERVERID)" == YES ]]; then
         export EDGESERVER=1
      else
         export EDGESERVER=0
      fi
   else
      export EDGESERVER=0
   fi
}

#------------------------------------------------------------------------------
# Check if user is running as required OS user.
#------------------------------------------------------------------------------
check_uid () {
   user=$(id -un)
   if [[ ${user} != ${OSUSER} ]]; then
      die "Must be run by user: ${OSUSER}. Abort!"
   fi
}

#------------------------------------------------------------------------------
# Function log() - echo message to logfile or stdout.
#
# If $LOGFILE is defined, write message to the log file only; nothing goes to
# stdout.  Prepend a datestamp.
# If $LOGFILE isn't defined, just echo to stdout, w/o timestamp or.
# In all cases, support '-e' formatting.
# Input:
# $1 - message to log (must be quoted).
#------------------------------------------------------------------------------
log () {
   if [[ ${LOGFILE:-Unset} != Unset ]]; then
      echo -n $(date)   2>&1 >> "$LOGFILE"
      echo -e " $0: $@" 2>&1 >> "$LOGFILE"
   else
      echo -e "$@"
   fi
}

#------------------------------------------------------------------------------
# Decide depending on our mail utility, how to specify sender (if we need to).
# Mail on some platforms sets sender by default.
# If the mail utility returns what looks like a version identifier
# when given the '-V' flag, use a '-S' flag.  If it does not return a
# verision identifier, don't set a mail sender option.
# Allow GNU Mailutils alternative flag instead.
#------------------------------------------------------------------------------
get_mail_sender_opt () {
   local mail_sender_opt=
   local mail_ver=
   if [[ -n "$MAILFROM" ]]; then
      mail_ver=$($SDPMAIL -V 2>&1)
      if [[ "$mail_ver" =~ "GNU Mailutils" ]]; then
         mail_sender_opt="-aFrom:$MAILFROM"
      elif  [[ "$mail_ver" =~ ^[0-9]+\.[0-9] ]]; then
         mail_sender_opt="-S from=$MAILFROM"
      fi
   fi
   echo "$mail_sender_opt"
}

#------------------------------------------------------------------------------
# Email the log file by $LOGFILE.
#------------------------------------------------------------------------------
mail_log_file () {
   local subject=$1
   local mail_sender_opt=$(get_mail_sender_opt)
   $SDPMAIL -s "$subject" $mail_sender_opt $MAILTO < "$LOGFILE"
}

#------------------------------------------------------------------------------
# Function die() - log message, send email, and exit.
# If $LOGFILE is defined, write message to the log file, email log,
# and exit.
# If $LOGFILE is not defined, write message to the stdout, and skip
# email.
# If in terminal session, display message to stderr as well.
#------------------------------------------------------------------------------
die () {
   # mail the error (with more helpful subject line than cron)
   log "ERROR!!! - $HOSTNAME $P4SERVER $0: $@"

   if [[ ${LOGFILE:-Unset} != Unset ]]; then
      mail_log_file "ERROR!!! - $HOSTNAME $P4SERVER $0: $@"
   fi

   # if running from terminal, also send to stderr
   tty >/dev/null
   if [[ $? -eq 0 ]]; then
      echo -e "$@" >&2
   fi
   rm -f ${LOGS}/ckp_running.txt

   exit 1
}

#------------------------------------------------------------------------------
# Write a semaphore file, $LOGS/ckp_running.txt.  This file is written at
# the start of processing, and removed upon successful completion.  It
# prevents multiple concurrent operations from being launched accidentally
# e.g. by multiple human admins, or a human inadvertantly competing with a
# cron job.
#
# It is also intended to get human admins to determine the root cause of
# checkpoint failues.
#------------------------------------------------------------------------------
ckp_running() {
   if [[ -f ${LOGS}/ckp_running.txt ]]; then
      die "Last checkpoint not complete. Check the backup process or contact support."
   fi
   echo "Checkpoint running." > ${LOGS}/ckp_running.txt
}

#------------------------------------------------------------------------------
# Remove the ckp_running.txt semaphore file when checkpoint processing is
# complete.
#------------------------------------------------------------------------------
ckp_complete() {
   rm -f ${LOGS}/ckp_running.txt
}

#------------------------------------------------------------------------------
# Ensure key directories are writable. Abort if they are not.
#------------------------------------------------------------------------------
check_dirs () {
   # Check that key dirs are writable
   declare -i dirsOK=1
   dirList="$OFFLINE_DB $CHECKPOINTS $LOGS"
   [[ $EDGESERVER -eq 1 ]] && dirList+=" ${CHECKPOINTS}.${SERVERID#p4d_}"
   for dir in $dirList; do
      if [[ ! -d "$dir" || ! -w "$dir" ]]; then
         log "Error: Dir $dir does not exist or is not writable."
         dirsOK=0
      fi
   done
   [[ $dirsOK -eq 1 ]] || "Some expected dirs are missing or not writable. Aborting."
}

#------------------------------------------------------------------------------
# Add the results of df -h or df -m to the log file.
#------------------------------------------------------------------------------
check_disk_space () {
   log "Checking disk space..."
   $P4BIN diskspace >> "$LOGFILE" 2>&1
}

#------------------------------------------------------------------------------
# Check value of journal; ensure it is an integer.
#------------------------------------------------------------------------------
check_journalnum () {
   local JNLNUM=${1:-Unset}
   re='^[0-9]+$'
   if ! [[ $JNLNUM =~ $re ]] ; then
      die "The journal counter value [$JNLNUM] is invalid. It must be numeric."
   fi
}

#------------------------------------------------------------------------------
# Determine journal counter by checking counter in db.counters.
#------------------------------------------------------------------------------
get_journalnum () {
   # get the current journal and checkpoint serial numbers.
   local nextCheckpointNum
   if [[ -r "$P4ROOT/db.counters" ]]; then
      nextCheckpointNum=$($P4DBIN -r $P4ROOT -k db.counters -jd - 2>&1 | grep @journal@ | cut -d '@' -f 8)

      if [[ -n "$nextCheckpointNum" ]]; then
         check_journalnum "$nextCheckpointNum"
         JOURNALNUM="$nextCheckpointNum"
      else
         # Special case: If db.counters is empty, then we have a new/empty data
         # set, so just set the value to 0.
         JOURNALNUM=0
      fi
   else
      # Special case: If db.counters doesn't exist, then we have a new/empty
      # data set, so just set the value to 0.
      JOURNALNUM=0
   fi

   # If we are on an edge server, the journal has already rotated, so we have to decrement the value
   # so that we replay the correct journal file and create the correct checkpoint number on the
   # edge server.
   if [[ $EDGESERVER -eq 1 ]]; then
      JOURNALNUM=$(($JOURNALNUM - 1))
   fi
   CHECKPOINTNUM=$(($JOURNALNUM + 1))
}

#------------------------------------------------------------------------------
# Verify that the offline databases are usable by checking the existence
# of a 'offline_db_usable.txt' file that is written only when databases
# are in a known-good state, following successful recovery from a checkpoint.
#------------------------------------------------------------------------------
check_offline_db_usable () {
   # Check it is OK
   if [[ ! -f $OFFLINE_DB/offline_db_usable.txt ]]; then
      die "Offline database not in a usable state. Check the backup process."
   fi
   if [[ ! -f $OFFLINE_DB/db.counters ]]; then
      die "Offline database not found. Consider creating it with live_checkpoint.sh. Be aware that it locks the live system and can take a long time! Abort!"
   fi
}

#------------------------------------------------------------------------------
# Determine journal counter in offline databases.
#------------------------------------------------------------------------------
get_offline_journal_num () {
   # Get the journal number of the offline database
   check_offline_db_usable
   OFFLINEJNLNUM=$($P4DBIN -r $OFFLINE_DB -jd - db.counters 2>&1 | grep '@journal@' | cut -d "@" -f 8 2>> $LOGFILE) || die "Cannot get the offline journal number. Abort!"
   check_journalnum $OFFLINEJNLNUM
   log "Offline journal number is: $OFFLINEJNLNUM"
}

#------------------------------------------------------------------------------
# Cleanup old log files.
#------------------------------------------------------------------------------
remove_old_checkpoints_and_journals () {
   declare CheckpointsDir=
   declare StandbyReplicaJournalsDir=
   declare FilePrefix=

   if [[ $KEEPCKPS -eq 0 ]]; then
      log "Skipping cleanup of old checkpoints because KEEPCKPS is set to 0."
   else
      log "Deleting obsolete checkpoints and journals. Keeping latest $KEEPCKPS  per KEEPCKPS setting in p4_vars."
      if [[ $EDGESERVER -eq 0 ]]; then
         # Refer to ckp/jnl files starting like: /p4/N/checkpoints/p4_N
         CheckpointsDir="${CHECKPOINTS}"
         FilePrefix="${P4SERVER}"
      else
         # Refer to ckp/jnl files starting like (example ServerID=p4d_edge_nyc):
         # /p4/N/checkpoints.edge_nyc/p4_N.edge_nyc
         CheckpointsDir="${CHECKPOINTS}.${SERVERID#p4d_}"
         FilePrefix="${P4SERVER}.${SERVERID#p4d_}"
      fi

      if [[ -d "$CheckpointsDir" ]]; then
         # Remove selected checkpoint and journal files based on the KEEPCKPS
         # setting regardless of whether compressed or not.
         # We multiply KEEPCKP by 2 for the ckp files because of the md5 files.
         for I_LOGFILE in $(ls -t ${CheckpointsDir}/${FilePrefix}.ckp.* 2>/dev/null | $AWK "NR > ($KEEPCKPS * 2)"); do
            log "rm -f $I_LOGFILE"
            rm -f "$I_LOGFILE"
         done

         # Use KEEPJNLS to allow for separate journal rotation at a higher
         # frequency.
         for I_LOGFILE in $(ls -t ${CheckpointsDir}/${FilePrefix}.jnl.* 2>/dev/null | $AWK "NR > $KEEPJNLS"); do
            log "rm -f $I_LOGFILE"
            rm -f "$I_LOGFILE"
         done
      fi

      StandbyReplicaJournalsDir="${P4HOME}/journals.rep"
      if [[ -d "$StandbyReplicaJournalsDir" ]]; then
         for I_LOGFILE in $(ls -t $StandbyReplicaJournalsDir/${FilePrefix}.ckp.* 2>/dev/null | $AWK "NR > ($KEEPCKPS * 2)"); do
            log "rm -f $I_LOGFILE"
            rm -f "$I_LOGFILE"
         done

         for I_LOGFILE in $(ls -t ${StandbyReplicaJournalsDir}/${FilePrefix}.jnl.* 2>/dev/null | $AWK "NR > $KEEPJNLS"); do
            log "rm -f $I_LOGFILE"
            rm -f "$I_LOGFILE"
         done
      fi
   fi
}

#------------------------------------------------------------------------------
# Shutdown p4d using the standard init script. Log the shutdown activity.
#------------------------------------------------------------------------------
stop_p4d () {
   log "Shutting down the p4 server"
   $RC stop >> "$LOGFILE" 2>&1
   log "p4 stop finished -- p4 is down now."
}

#------------------------------------------------------------------------------
# Start p4d using the standard init script. Log the startup activity.
# Return status indicates whether the server started successfully or not.
#------------------------------------------------------------------------------
start_p4d () {
   log "Starting the p4 server"
   $RC start >> "$LOGFILE" 2>&1
   sleep 3 # Give it a few seconds to start up
   # Confirm that it started - success below means it did
   if $P4BIN -u $P4USER -p $P4PORT info >/dev/null 2>&1 ; then
      log "Server restarted successfully - p4 should be back up now."
      return 0
   else
      log "Error: Server does not appear to have started."
      return 1
   fi
}

#------------------------------------------------------------------------------
# Call 'p4d -jj' to rotate the current/active journal file on the master
# server, starting a fresh new P4JOURNAL file.
#
# In a distributed topology with edge servers, this function
# be called on the master/commit server.
#------------------------------------------------------------------------------
truncate_journal () {
   declare CheckpointFile="${CHECKPOINTS}/${P4SERVER}.ckp.${CHECKPOINTNUM}.gz"
   declare JournalFile="${CHECKPOINTS}/${P4SERVER}.jnl.${JOURNALNUM}"

   if [[ $EDGESERVER -eq 0 ]]; then
      [[ -f "$CheckpointFile" ]] && \
         die "Checkpoint $CheckpointFile already exists, check the backup process."
      [[ -f "$JournalFile" ]] && \
         die "Journal $JournalFile already exists, check the backup process."

      log "Truncating journal..."
      # 'p4d -jj' does a copy-then-delete, instead of a simple mv.
      # During 'p4d -jj' the perforce server will hang the responses to clients,
      # this should be for a very short period of time even for large data
      # sets, as the journal represents a single day of metadata.
      # Curly braces capture output of 'time'.
      $P4CBIN/p4login -p $P4MASTERPORT
      { time $P4BIN -p $P4MASTERPORT admin journal ${CHECKPOINTS}/${P4SERVER}; } >> "$LOGFILE" 2>&1 || { die "Journal rotation failed. Abort!"; }
      # The test below waits until the journal file exists in the checkpoints directory before proceeding.
      test=1
      while [ $test != 0 ]
      do
         sleep 5
         if [ -f "$JournalFile" ];then
            test=0
         fi
      done
      $P4CBIN/p4login
   else
      log "Warning: truncate_journal () function should not be called on an edge server. Ignoring."
   fi
}

#------------------------------------------------------------------------------
# Similar to truncate_journal() above, p4d_truncate_journal() is intended to be
# usable form the p4d_base init script, to allow journal rotation on p4d
# start.
#------------------------------------------------------------------------------
p4d_truncate_journal () {
   declare JournalFile="${CHECKPOINTS}/${P4SERVER}.jnl.${JOURNALNUM}"

   if [[ $EDGESERVER -eq 0 ]]; then
      [[ -f "$JournalFile" ]] && \
         die "Journal $JournalFile already exists, check the backup process."
      log "Rotating journal prior to starting p4d."
      $P4DBIN -r $P4ROOT -J $P4JOURNAL -jj ${CHECKPOINTS}/${P4SERVER} >> "$LOGFILE" 2>&1 ||\
         die "Failed to rotate journal. Aborting p4d server start."
   fi
}

#------------------------------------------------------------------------------
# Replay any and all numbered journal files into the offline databases.
#------------------------------------------------------------------------------
replay_journals_to_offline_db () {
   local CheckpointsDir=
   local FilePrefix=
   local NumberedJournal=

   log "Replay any unreplayed journals to the offline database."

   if [[ $EDGESERVER -eq 0 ]]; then
      # Refer to ckp/jnl files starting like: /p4/N/checkpoints/p4_N
      CheckpointsDir="${CHECKPOINTS}"
      FilePrefix="${P4SERVER}"
   else
      # Refer to ckp/jnl files starting like (example ServerID=p4d_edge_nyc):
      # /p4/N/checkpoints.edge_nyc/p4_N.edge_nyc
      CheckpointsDir="${CHECKPOINTS}.${SERVERID#p4d_}"
      FilePrefix="${P4SERVER}.${SERVERID#p4d_}"
   fi

   for (( j=$OFFLINEJNLNUM; $j <= $JOURNALNUM; j++ )); do
      NumberedJournal="${CheckpointsDir}/${FilePrefix}.jnl.${j}"
      log "Replay journal $NumberedJournal to offline db."
      # Curly braces capture output of 'time'.
      { time $P4DBIN -r $OFFLINE_DB -jr -f $NumberedJournal; } >> "$LOGFILE" 2>&1 || { die "Offline journal replay failed. Abort!"; }
   done
}

#------------------------------------------------------------------------------
# Replay the live, active P4JOURNAL file into the offline databaes.
#------------------------------------------------------------------------------
replay_active_journal_to_offline_db () {
   log "Replay active journal to offline db."
   # Curly braces capture output of 'time'.
   { time $P4DBIN -r $OFFLINE_DB -jr -f ${P4JOURNAL}; } >> "$LOGFILE" 2>&1 || { die "Active Journal replay failed. Abort!"; }
}

#------------------------------------------------------------------------------
# Recreate offline databases from the latest checkpoint.
#------------------------------------------------------------------------------
recreate_offline_db_files () {
   local CheckpointsDir=
   local FilePrefix=
   local LastCheckpointMD5=
   local LastCheckpoint=

   if [[ $EDGESERVER -eq 0 ]]; then
      # Refer to ckp/jnl files starting like: /p4/N/checkpoints/p4_N
      CheckpointsDir="${CHECKPOINTS}"
      FilePrefix="${P4SERVER}"
   else
      # Refer to ckp/jnl files starting like (example ServerID=p4d_edge_nyc):
      # /p4/N/checkpoints.edge_nyc/p4_N.edge_nyc
      CheckpointsDir="${CHECKPOINTS}.${SERVERID#p4d_}"
      FilePrefix="${P4SERVER}.${SERVERID#p4d_}"
   fi

   [[ -f ${CheckpointsDir}/${FilePrefix}.ckp.*.gz ]] && ckp_complete && die "No checkpoints found - run live_checkpoint.sh"
   rm -f ${OFFLINE_DB}/offline_db_usable.txt >> "$LOGFILE" 2>&1
   rm -f ${OFFLINE_DB}/db.* >> "$LOGFILE" 2>&1
   rm -f ${OFFLINE_DB}/save/db.* >> "$LOGFILE" 2>&1
   LastCheckpointMD5=$(ls -t ${CheckpointsDir}/${FilePrefix}.ckp.*.md5 | head -1)
   [[ -n "$LastCheckpointMD5" ]] || \
      die "Could not find *.md5 file for latest checkpoint. Abort!"

   # Account for the idiosyncracy that MD5 files for checkpoints may look
   # like p4_N.ckp.gz.md5 or p4_N.ckp.md5.
   if [[ "$LastCheckpointMD5" == *".gz.md5" ]]; then
      LastCheckpoint="${LastCheckpointMD5%.md5}"
   else
      LastCheckpoint="${LastCheckpointMD5%.md5}.gz"
   fi

   [[ -r "$LastCheckpoint" ]] || \
      die "Missing last checkpoint file: $LastCheckpoint. Abort!"

   log "Recovering from last full checkpoint, $LastCheckpoint."
   # Curly braces capture output of 'time'.
   { time $P4DBIN -r $OFFLINE_DB -jr -z ${LastCheckpoint}; } >> "$LOGFILE" 2>&1 || { die "Restore of checkpoint to $OFFLINE_DB failed!"; }
   echo "Offline db file restored successfully." > ${OFFLINE_DB}/offline_db_usable.txt
}

#------------------------------------------------------------------------------
# Take a live checkpoint from db.* files in P4ROOT.
#------------------------------------------------------------------------------
checkpoint () {
   local CheckpointsDir=
   local FilePrefix=

   log "Create a new checkpoint from live db files in $P4ROOT."

   if [[ "$EDGESERVER" -eq 0 ]]; then
      # Refer to ckp/jnl files starting like: /p4/N/checkpoints/p4_N
      CheckpointsDir="${CHECKPOINTS}"
      FilePrefix="${P4SERVER}"
   else
      die "Checkpoints may not be run on an edge server."
   fi

   # Curly braces capture output of 'time'.
   { time $P4DBIN -r $P4ROOT -jc -Z ${CheckpointsDir}/${FilePrefix}; } >>"$LOGFILE" 2>&1 || { die "ERROR - New checkpoint failed!"; }
}

#------------------------------------------------------------------------------A
# Take a checkpoint from the ROOTDIR, typically either /p4/N/root or
# /p4/N/offline_db.
#------------------------------------------------------------------------------
dump_checkpoint () {
   declare CheckpointsDir=
   declare NewCheckpoint=
   declare NewCheckpointMD5=
   declare FilePrefix=

   log "Dump out new checkpoint from db files in $ROOTDIR."

   if [[ $EDGESERVER -eq 0 ]]; then
      # Refer to ckp/jnl files starting like: /p4/N/checkpoints/p4_N
      CheckpointsDir="${CHECKPOINTS}"
      FilePrefix="${P4SERVER}"
   else
      # Refer to ckp/jnl files starting like (example ServerID=p4d_edge_nyc):
      # /p4/N/checkpoints.edge_nyc/p4_N.edge_nyc
      CheckpointsDir="${CHECKPOINTS}.${SERVERID#p4d_}"
      FilePrefix="${P4SERVER}.${SERVERID#p4d_}"
   fi

   NewCheckpoint=${CheckpointsDir}/${FilePrefix}.ckp.${CHECKPOINTNUM}.gz
   NewCheckpointMD5=${NewCheckpoint}.md5

   if [[ -r "$NewCheckpoint" && -r "$NewCheckpointMD5" ]]; then
      log "\nWarning: Skipping generation of existing checkpoint $NewCheckpoint.\nVerified MD5 file exists: $NewCheckpointMD5."
      return
   fi

   # Curly braces capture output of 'time'.
   { time $P4DBIN -r $ROOTDIR -jd -z ${CheckpointsDir}/${FilePrefix}.ckp.${CHECKPOINTNUM}.gz; } >> "$LOGFILE" 2>&1 || { die "New checkpoint dump failed!"; }
}

#------------------------------------------------------------------------------
# Compare journal numbers between live and offline databases, to ensure
# they can be safely swapped out.
#------------------------------------------------------------------------------
compare_journal_numbers () {
   # Get the journal number of the offline database
   if [[ ! -f $OFFLINE_DB/offline_db_usable.txt ]]; then
      die "Offline database not in a usable state. Check the backup process."
   fi
   if [[ ! -f $OFFLINE_DB/db.counters ]]; then
      die "Offline database not found. Consider creating it with live_checkpoint.sh. Be aware that it locks the live system and can take a long time! Abort!"
   fi
   local _OFFLINEJNLNUM=$($P4DBIN -r $OFFLINE_DB -jd - db.counters 2>&1 | grep '@journal@' | cut -d "@" -f 8 2>> $LOGFILE) || die "Cannot get $OFFLINE_DB journal number. Abort!"
   check_journalnum $_OFFLINEJNLNUM

   # Get the journal number of the root database
   if [[ ! -f $P4ROOT/db.counters ]]; then
      die "$P4ROOT database not found. Something is seriously wrong since the server was just running a minute ago! Contact support@perforce.com"
   fi
   local _JNLNUM=$($P4DBIN -r $P4ROOT -jd - db.counters 2>&1 | grep '@journal@' | cut -d "@" -f 8 2>> $LOGFILE) || die "Cannot get $P4ROOT journal number. Abort!"
   check_journalnum $_JNLNUM

   if [[ $_JNLNUM -gt $_OFFLINEJNLNUM ]]; then
      log "$P4ROOT journal number is: $_JNLNUM"
      log "$OFFLINE_DB journal number is: $_OFFLINEJNLNUM"
      die "$OFFLINE_DB journal number is less than $P4ROOT, cannot switch."
   fi
}

#------------------------------------------------------------------------------
# Swap out live db.* database files in P4ROOT with those in offline_db.
#------------------------------------------------------------------------------
switch_db_files () {
   # Compare the Offline and Master journal numbers before switching to make sure they match.
   compare_journal_numbers
   log "Switching root and offline_db links..."
   [[ -d ${P4ROOT}/save ]] || mkdir -p ${P4ROOT}/save
   rm -f ${P4ROOT}/save/db.* >> $LOGFILE 2>&1
   mv ${P4ROOT}/db.* ${P4ROOT}/save >> $LOGFILE 2>&1
   mv ${P4ROOT}/license* ${OFFLINE_DB} >> $LOGFILE 2>&1
   mv ${P4ROOT}/rdb.lbr ${OFFLINE_DB} >> $LOGFILE 2>&1
   mv ${P4ROOT}/state* ${OFFLINE_DB} >> $LOGFILE 2>&1
   mv ${P4ROOT}/server.id ${OFFLINE_DB} >> $LOGFILE 2>&1
   rm -f ${OFFLINE_DB}/offline_db_usable.txt
   OLDBLNK=$(readlink $OFFLINE_DB)
   ROOTLNK=$(readlink $P4ROOT)
   unlink $OFFLINE_DB
   unlink $P4ROOT
   ln -s $OLDBLNK $P4ROOT >> $LOGFILE 2>&1 || die "Link of $OLDBLNK to $P4ROOT failed."
   ln -s $ROOTLNK $OFFLINE_DB >> $LOGFILE 2>&1 || die "Link of $ROOTLNK to $OFFLINE_DB failed."
}

#------------------------------------------------------------------------------
# Rotate specified log files, and compress with gzip.
#------------------------------------------------------------------------------
rotate_log_file () {
   cd "$LOGS"
   ROTATE_LOGNAME=$1
   GZ_EXT=${2:-}
   LOGID=$(date +'%Y-%m-%d_%H-%M-%S')
   if [[ -f ${ROTATE_LOGNAME} ]]; then
      mv -f ${ROTATE_LOGNAME} ${ROTATE_LOGNAME}.${LOGID} >> $LOGFILE 2>&1
      [[ ! -z "$GZ_EXT" ]] && gzip ${ROTATE_LOGNAME}.${LOGID} >> $LOGFILE 2>&1
   fi
   cd - > /dev/null
}

#------------------------------------------------------------------------------
# At the start of each run for live_checkpoint.sh, daily_checkpoint.sh, and
# recreate_db_checkpoint.sh, before *any* logging activity occurs, rotate the
# logs from the most recent prior run, always named "checkpoint.log" or "log".
#------------------------------------------------------------------------------
rotate_last_run_logs () {
   # Rotate prior log file for the current script.
   rotate_log_file $LOGFILE

   # Rotate prior server log.
   rotate_log_file "log" ".gz"

   # Rotate prior broker log.
   rotate_log_file "p4broker.log" ".gz"

   # Rotate prior audit log.
   rotate_log_file "audit.log" ".gz"
}

#------------------------------------------------------------------------------
# Remove log files matching a specified name prefix, preserving a specified
# number of the recent logs.
#------------------------------------------------------------------------------
remove_log_files () {
   REMOVE_LOGNAME=$1
   KEEPNUM=$2

   for I_LOGFILE in $(ls -t ${REMOVE_LOGNAME}* 2>/dev/null | $AWK "NR > $KEEPNUM"); do
      log "rm -f $I_LOGFILE"
      rm -f "$I_LOGFILE"
   done
}

#------------------------------------------------------------------------------
# Remove old logs.
#------------------------------------------------------------------------------
remove_old_logs () {
   # Remove old Checkpoint Logs
   # Use KEEPJNLS rather than KEEPLOGS, so we keep the same number
   # of checkpoint logs as we keep checkpoints.
   cd "$LOGS"

   if [[ $KEEPJNLS -eq 0 ]]; then
      log "Skipping cleanup of old checkpoint logs because KEEPJNLS is set to 0."
   else
      log "Deleting old checkpoint logs.  Keeping latest $KEEPJNLS, per KEEPJNLS setting in p4_vars."
      remove_log_files "checkpoint.log" $KEEPJNLS
   fi

   if [[ $KEEPLOGS -eq 0 ]]; then
      log "Skipping cleanup of old server logs because KEEPLOGS is set to 0."
   else
      log "Deleting old server logs.  Keeping latest $KEEPLOGS, per KEEPLOGS setting in p4_vars."
      remove_log_files "log" $KEEPLOGS
      remove_log_files "p4broker.log" $KEEPLOGS
      remove_log_files "audit.log" $KEEPLOGS
      remove_log_files "sync_replica.log" $KEEPLOGS
      remove_log_files "recreate_offline_db.log" $KEEPLOGS
      remove_log_files "upgrade.log" $KEEPLOGS
      remove_log_files "p4login" $KEEPLOGS
      remove_log_files "p4verify.log" $KEEPLOGS
   fi
   cd - > /dev/null
}

#------------------------------------------------------------------------------
# Set the SDP Checkpoint counter to indicate last successful SDP checkpoint
# operation.
#------------------------------------------------------------------------------
set_counter() {
   $P4CBIN/p4login
   $P4BIN -u $P4USER -p $P4PORT counter LastSDPCheckpoint.$SERVERID "$(date +'%s (%Y/%m/%d %H:%M:%S %z %Z)')" > /dev/null
}

#	Change	User	Description
#1	23960	noe_gonzalez	"Forking branch Dev of perforce-software-sdp to noe_gonzalez-sdp."
//guest/perforce_software/sdp/dev/Server/Unix/p4/common/bin/backup_functions.sh
#68	23848	Robert Cowham	Missed a fix for the crontab spamming when track=1 If you do p4d -jd - you need to pipe 2>&1
#67	23639	Robert Cowham	Remove trailing spaces on all lines - important to make line continuations work.
#66	23637	Robert Cowham	Avoid spamming emails from crontab when track=1
#65	23429	Robert Cowham	Fix a couple of shellcheck warnings
#64	23266	C. Thomas Tyler	Fixes and Enhancements: * Enabled daily_checkpoint.sh operate on edge servers, to keep /p4/N/offline_db current on those hosts for site-local recovery w/o requiring a site-local replica (though having a site-local replica can still be useful). * Disabled live_checkpoint.sh for edge servers. * More fully support topologies using edge severs, in both geographically distributed and horizaontal scaling "wokspace server" solutions. * Fix broken EDGESERVER value definition. * Modified name of SDP counter that gets set when a checkpoint is taken to incorporate ServerID, so now the counter name will look like lastSDPCheckpoint.master.1, or lastSDPCheckpoint.p4d_edge_sfo, rather than just lastSDPCheckpoint. There will be multiple such counters in a topology that uses edge servers, and/or which takes checkpoints on replicas. * Added comments for all functions. For the master server, journalPrefix remains: /p4/N/checkpoints/p4_N The /p4/N/checkpoints is reserved for writing by the master/commit server only. For non-standby (possibly filtered) replicas and edge serves, journalPrefix is: /p4/N/checkpoints.<ShortServerID>/p4_N.<ShortServerID> Here, ShortServerID is just the ServerID with the 'p4d_' prefix trimmed, since it is redundant in this context. See mkrep.sh, which enshines a ServerID (server spec) naming standard, with values like 'p4d_fr_bos' (forwarding replica in Boston) and p4d_edge_blr (Edge server in Bangalore). So the journalPrefix for the p4d_edge_bos replica would be: /p4/N/checkpoints.edge_bos/p4_N.edge_bos For "standby" (aka journalcopy) replicas, journalPrefix is set to /p4/N/journals.rep. which is written to the $LOGS volume, due to the nature of standby replicas using journalPrefix to write active server logs to pre-rotated journals. Some take-away to be updated in docs: * The /p4/N/checkpoints folder must be reserved for checkpoints that originate on the master. It should be safe to rsync this folder (with --delete if desired) to any replica or edge server. This is consistent with the current SDP. * I want to change 'journals.rep' to 'checkpoints.<ShortServerID>' for non-standby replicas, to ensure that checkpoints and journals taken on those hosts are written to a volume where they are backed up. * In sites with multiple edge serves, some sharing achive files ('workspace servers'), multiple edge servers will share the same SAN. So we one checkpoints dir per ServerID, and we want that dir to be on the /hxdepots volume. Note that the journalPrefix for replicas was a fixed /p4/N/journals.rep. This was on the /hxlogs volume - a presumably fast-for-writes volume, but typically NOT backed up and not very large. This change puts it under /p4/N/checkpoints.* for edge servers and non-standby replicas, but ensures other replica types and edge servers can generate checkpoints to a location that is backed up and has plenty of storage capacity. For standby replicas only (which cannot be filtered), the journalPrefix remains /p4/N/journals.rep on the /hxlogs volume.
#63	23228	Russell C. Jackson (Rusty)	Adding back a fix that I put in revsion 43 that somehow disappered in revision 44.
#62	23031	C. Thomas Tyler	Simplified stop_p4d() in backup functions to just call the init script. Previously stop_p4d() had 'wait' logic to wait for p4d to stop before trying to stop with a 'kill' signal, but this is no longer needed since the modern init script (with logic in p4d_base) now does a 'kill' anyway, and also has the 'wait' logic and will exit only when p4d is well and truly down. In upgrade.sh, fixed issue where start/stop of p4broker and p4p went directly to the screen instead of the log. Also changed to call start/stop init scripts for p4d directly, just as for other services. Also enhanced upgrade.sh logging: * Log file name to incorporate SDP instance (redundant but nice). * Remove pesky ':' characters from the datestamp in log file name, as ':' chars in file names wreak havoc with 'scp' commands and require escaping on the command line. * Added log comments indicating which databases are being upgraded ($P4ROOT and $OFFLINE_DB). Also added common explaining use of '-t' flag in 'p4d -xu' call for offline databases.
#61	23022	Sven Erik Knop	Simple fix for daily_checkpoint if the master server has its own server spec. If the master server has a server spec matching the serverid name, the script will check if the server is an edge server. This is done to avoid off-by-1 errors in the journal counter. The logic for the check was broken, though, resulting in checkpoints failing with "checkpoint.xxx.gz already exist, check setup" This fix avoids the problem. Bash script arithmatic is not as easy.
#60	22802	Russell C. Jackson (Rusty)	Fixed the test to check for the variable and do the bitwise mask test correctly.
#59	22800	Russell C. Jackson (Rusty)	Correct the logic on checking for an edge server. Removed () from the set_vars call that got in via a cut and paste.
#58	22679	Russell C. Jackson (Rusty)	Removed duplicate line that was producing a cron message.
#57	22658	Russell C. Jackson (Rusty)	Added line to remove the ckp_running.txt file when the checkpoint fails through the die function because the checkpoint is no longer running, and this file prevents the next checkpoint from running successfully.
#56	22633	Russell C. Jackson (Rusty)	Removed Debug and extra echo of journal number to eliminate cron messages.
#55	22387	Robert Cowham	Fix journal rotation off-by-one error Also make sure that -jj rotation specifies prefix.
#54	22345	C. Thomas Tyler	Another tweak.
#53	22343	C. Thomas Tyler	Fixed off-by-one error in new offline journal counter calculation logic. Bypassing pre-commit review until test suite runs clean again. #review-22344
#52	22277	C. Thomas Tyler	Debugging.
#51	22276	C. Thomas Tyler	Debugging.
#50	22274	C. Thomas Tyler	Fixed bug where detection of journal number fails for new/empty data set. Removed msg() and bail() functions, and changed approach to make the existing log() and die() functions behave correctly regardless of whether $LOGFILE is defined. If $LOGFILE is defined, log() silently write to the log file, otherwise writes to the screen (stdout). If $LOGFILE is defined, die() writes to the log file and sends an email, otherwise writes to the screen (stdout). If on a tty, error is duplicated in stderr. To Do: Improve in-code comments. Bypassing pre-commit review until tests pass. #review-22275
#49	22272	C. Thomas Tyler	Enhanced error message in check_journalnum() in backup_functions.hs. Bypassing pre-commit review until tests pass. #review-22273
#48	22270	C. Thomas Tyler	Attempting fix of build failure. Bypassing pre-commit review. #review-22271
#47	22250	C. Thomas Tyler	Further refinements to the new 'rotate journal on p4d start' change: * Fixed p4d_truncate_journal so it has less environment dependencies (e.g. doesn't depend on LOGFILE, etc.) and doesn't try sending email. * Introduced msg() and bail(), counterparts to log() and die() which don't try to write to LOGFILE and don't try to send email. * Added call to get_journalnum() before call to p4d_truncate_journal(). * Fixed logic in get_journalnum() so it gets the journal number w/o needing p4d to be up. * I think I fixed the syntax error in bitwise operator check when setting EDGE_SERVER. It works on a non-edge server (sets EDGESERVER=0). For now I have it doing an 'echo EDGESERVER=$EDGESERVER', but need to test that it correctly sets EDGESERVER=1 on an edge server. TO DO: Remove that 'echo EDGESERVER=$EDGESERVER' once we verify it correctly sets the value for $EDGESERVER. (Or not?)
#46	22239	Russell C. Jackson (Rusty)	Change set_vars to look up the edge server directly in the database so the server does not have to be on-line to check. Fix for Job: SDP-223
#45	22066	Russell C. Jackson (Rusty)	Added rotate for p4verify.log instead of just deleting the prior one.
#44	21624	C. Thomas Tyler	Fixed issue with mail sending a usage error on Ubuntu, which does not accept the '-V' flag to check the version.
#43	21580	Russell C. Jackson (Rusty)	Changed compare journal numbers function to only fail if root journal number is greater than offline_db. The not equal check was preventing the recreate_db_sync_replca.sh script from being used to fix a replica that was out of sync with the master.
#42	21322	Russell C. Jackson (Rusty)	#review-21323 Forgot server.id
#41	21318	Russell C. Jackson (Rusty)	#review-21319 Added commands to move license, rdb.lbr and state from P4ROOT to OFFLINE_DB before switching the links. Added command to remove the db.* files from offline_db/save as well before trying to recreate the offline database.
#40	21178	Russell C. Jackson (Rusty)	Change the SDP so that root and offline_db can be on different volumes and still accomplish a fast database recovery using recreate_db_checkpoint.sh and recreate_db_sync_replica.sh. This is done by switching the links now rather than moving the db files.
#39	20970	Russell C. Jackson (Rusty)	Changed to use the standard remove log function on the p4login log. We don't need to keep anymore than the keeplogs specified number of these logs around. It doesn't matter if they are all in the last hour or the last seven days. The only need for a p4login log is for debugging something not working. Anyone that needs long term tracking of logins can turn on the auth structured log to track the logins.
#38	20964	adrian_waters	Include removal of the p4login.*.log files in daily cleanup
#37	20940	Russell C. Jackson (Rusty)	Drop JOURNALNUM from the rotated log names because it forces you to wait to rotate the prior logs until you get the journal number and creates a problem where the error that you couldn't get the journal number ends up at the end of the previous days log file, and that is what gets email out. That causes confusion for the person trying to see what the error is. Moved all rotate_last_run_logs up to the point right after we set the environment.
#36	20822	C. Thomas Tyler	Change logic to use p4d init script only from /p4/N/bin. The current logic sets a variable essentially preferring the p4d init script in /etc/init.d, using the one in /p4/N/bin only if the one in /etc/init.d doesn't exist as a file (and would not be selected if it was a symlink). Reasons: * Referencing the file/symlink in /etc/init.d introduces potentially complex and confusing behavior. If there were a file in /etc/init.d rather than symlink'd, that could be bad if it doesn't get upated with new versions of the SDP, where stuff in /p4/N/bin should be reliably updated. * I just expect the SDP to always use its own files in /p4/N/bin, under direct control of the perforce user, rather than external references to it. In a proper SDP deployment on Linux, /etc/init.d should contain symlinks for SDP init scripts anyway. But why trust that if there's no need? * If there is a file in /etc/init.d and it's different than /p4/N/bin for some reason, we should prefer the one in /p4/N/bin. * The sylminks in /etc/init.d are outside the direct control of the perforce user, and could point to who-knows-where.
#35	20749	C. Thomas Tyler	Approved and committed, but I believe that the shared data setting is always set to false on the master and we should look at fixing that in another change. Enhanced p4login again. Improvements: Default behavior with no arguments gives the desired results. For example, if run on a master, we login on the super user P4USER to P4PORT. If run on a replica/edge and auth.id is set, we login P4USER to the P4TARGET port of the replica. All other login functionality, such as logging in the replication service user on a replica, logging in supplemental automation users, is now accessed via new flags. A usage message is now available via '-h' and '-man' options. The new synopsys is: p4login [<instance>] [-p <port> \| -service] [-automation] [-all] The <instance> parameter is the only non-flag positional parameter, and can be ommitted if SDP_INSTANCE is already defined (as is typical when called by scripts). With this change, several other scripts calling either the 'p4login' script or 'p4 login' commands were normalized to call p4login as appropriate given the new usage. Reviewer Note: Review p4login first, then other files. Most changes are in p4login. In other scripts callling p4login, calls similar to: $P4BIN -u $P4USER -p $P4PORT login < /path/to/pwd are replaced with: $P4CBIN/p4login In other scritps calling p4login, calls similar to: $P4BIN -p $P4MASTERPORT login < /path/to/pwd are replaced with: $P4CBIN/p4login -p $P4MASTERPORT Note that, if auth.id is set, calling 'p4login' actually has the same behavior as 'p4login -p $P4MASTERPORT', since p4login called on a replica with auth.id set will just login to the master port anyway. Depending on intent, sometimes $P4BIN/p4login -service is used. == Misc Cleanup == In doing the cleanup: * Fixed a hard-coding-to-instance-1 bug in broker_rotate.sh. * Fixed an inconsistency in recreate_db_sync_replica.sh, where it did just a regular login rather than a login -a as done in other places for (for compatibility with some multi-interface NIC card configs). == p4login Call Normalization == Code cleanup was done to normalize calls to p4login, such that: 1) the call starts with $P4CBIN/p4login (not the hard-coded path), and 2) logic to redirect sdtout/stderr to /dev/null was removed, since it's not necessary with p4login. (And if p4login ever does generate any unwanted output, we only fix it in one place). == Tweak to instance_vars.template == This change includes a tweak to set P4MASTERPORT dynamically on a replica to ensure the value precisely matches P4TARGET for the given replica. This will reduce a source of problems when SSL is used, as it is particularly sensitive to the precise P4PORT values used, and will also help for environments which have not yet set auth.id. If the port cannot be determined dynamically, we fall back to the old logic using the assigned value. == Tweak to SDP_ALWAYS_LOGIN behavior == This used to default to 1, now it defaults to 0. At this point we should no longer need to force logins, and in fact doing so can get into a 'p4 login' hang situation with auth.id set. Best to avoid unnecessary logins if we already have a valid ticket. (I think the need to force a login may have gone away with p4d patches). == Obsolete Script == With this change, svclogin.sh is now obsolete. All it was doing was a few redundant 'p4 login' commands followed by a call to p4login anyway. == Testing == Our test suite doesn't fully cover this change, so additional manual testing was done in the Battle School lab environment.
#34	20637	Russell C. Jackson (Rusty)	Fixed the real cause of the problem and put the redirects to LOGFILE back. The actual cause of the problem was that we were rotating the sync_replica.log file twice within that function because of the call to rotate $LOGFILE and a second call to rotate "sync_replica.log". I removed the 2nd call to rotate the sync_replica.log.
#33	20636	Russell C. Jackson (Rusty)	Changed mv and gzip in rotate log to go to /dev/null to avoid stomping on the file we just rotated.
#32	20170	Russell C. Jackson (Rusty)	Moved password and users into the config directory to allow for instance specific users and passwords. Ran into a case where two different teams were sharing the same server hardware and needed this type of differentiation. Surprised that we haven't hit this sooner. Also defaulted mkdirs to use the numeric ports since this is the most common installation.
#31	19851	Robert Cowham	Check for usable offline_db before creating checkpoint work file. This avoids an error right at the start locking out the utility which will fix said error!
#30	19768	UnstoppableDrew	@tom_tyler @russell_jackson Bug fix for running p4master_run as root, and some comment header cleanup. Job 000543 p4master_run: Preserve original arguments list and use this when exec'ing as $OSUSER. backup_functions.sh: Add text about sourcing p4_vars yourself instead of using p4master_run. update_limites.py: Run p4login directly without p4master_run since p4login calls p4_vars now. everything else: Remove comment block about needing to run with p4master_run. Reword comment about SDP_INSTANCE since it is not always an integer value.
#29	19523	Russell C. Jackson (Rusty)	Added a KEEPJNLS variable to allow you to keep more journals than checkpoints in case you rotate the journal more frequently than you run checkpoints.
#28	19113	Russell C. Jackson (Rusty)	Changed name of daily_backup.sh to daily_checkpoint.sh Changed name of weekly_backup.sh to recreate_db_checkpoint.sh Updated crontabs with new names, and changed to run recreate_db_checkpoint on the 1st Sat. of Jan. and July. For most companies, this is a better practice than recreating weekly per discussion with Anton. Remove solaris crontab since Solaris is pretty much dead, and we don't test on it. Updated docs to reflect name changes, and did a little clean other other sections while I was in there.
#27	19105	Russell C. Jackson (Rusty)	This change uses p4 admin journal command against the master server to rotate the journal. Added a p4d_truncate_journal to use in weekly_back that still rotates via p4d. The purpose of this change is to allow you to run daily_backup.sh on a standby machine where you have a shared depotdata volume. If you want to use daily on the standby machine, you have to put offline_db on the shared depotdata volume which means you will NOT want to run weekly_backup.sh on the master very often, but that is basically what Anton is recommending now. I am currently testing this setup on a production environment, and if it works well, I will change mkdirs.sh to put offline_db on the depotdata volume by default and update the crontabs not to run weekly anymore. #review-19083
#26	18934	C. Thomas Tyler	Moved ckp_runnig.txt to $LOGS (/p4/n/logs) from /p4/n/checkpoints: * Avoids it getting rsync'd by sync_replica.sh or by common human admin rsyncs of the /p4/n/checkpoints dir. * It should be in a volume that's not shared. * Puts it in the logs directory where you go look when things break.
#25	18617	Russell C. Jackson (Rusty)	#review-18610 Fixed a bug with check_journalnum where it was being called to check the offline journal number, but the function was hard coded to JOURNALNUM. Implemented a function to compare the journal numbers of P4ROOT and OFFLINE_DB before switching the db files as an extra layer of protection to avoid data loss.
#24	18595	Russell C. Jackson (Rusty)	Fixed a log rotation bug that has been around for a long time. If you rotated the journal more times than KEEPCKPS and KEEPLOGS, the old method would remove all of your logs and checkpoints because it didn't actually look at how many were on disk. Found the bug while reviewing the test harness with Robert. Adjusted the test harness to account for the change. (Stole from Robert's shelf.)
#23	18590	Robert Cowham	Fix failing tests. Change log filename format to use - instead of : as seperator for date/time component
#22	18587	Russell C. Jackson (Rusty)	Reworked the log rotation stuff in backup_functions.sh to make it cleaner and handle the new log from recreate_offline_db.sh. Modified recreate_offline_db.sh to add comments about a bad checkpoint. Also made it create its own log file since it isn't doing a checkpoint. Removed the log rotation for the same reason. Moved the LOGFILE setting out to all of scripts to make it more obvious for future scripts that you need to set that variable in your script so that it doesn't just default to checkpoint.log. Moved the functions in weekly_backup.sh and recreate_offline_db.sh into backup_functions.sh where they belong for consistency. Modified backup_functions.sh to use a consistent naming convention for all the rotated log files rather than checkpoint.log being unique. Replaced all back ticks with the newer bash $() method. Removed all of the line wrapping since I am pretty sure that none of us are working on an 80 character terminal these days and it is easier to read this way.
#21	18533	Robert Cowham	Put a date/time suffix onto checkpoint.log.* files in case of any errors to avoid them being overwritten. Make remove_old_logs tidy up appropriately.
#20	18532	Robert Cowham	Correct log message regarding journals replays
#19	18484	Russell C. Jackson (Rusty)	Added comment on WAITCOUNT to explain the value.
#18	18450	Russell C. Jackson (Rusty)	Added a kill for the p4d_stop function in case p4d doesn't shut down. In the process of testing this, I discovered that using $P4DBIN in this case was a bug that didn't work when running in case insensitive mode because the executable doesn't match what is actually running since we end up calling p4d from /p4/common/bin. Corrected the grep so that it would match in either case. #review-18430
#17	16335	C. Thomas Tyler	Routine Merge Down to dev from main using: p4 merge -b perforce_software-sdp-dev
#16	16029	C. Thomas Tyler	Routine merge to dev from main using: p4 merge -b perforce_software-sdp-dev
#15	15797	C. Thomas Tyler	Routine Merge Down to dev from main for SDP.
#14	15778	C. Thomas Tyler	Routine Merge Down to dev from main.
#13	15376	adrian_waters	formatting only - fix spacing; there's inconsistent use of tabs/spaces throughout the file - needs cleanup at some point.
#12	15375	adrian_waters	Routine merge-down from main->dev
#11	15374	adrian_waters	- Ensure backup scripts are run as the OSUSER (to prevent accidental running as root); - in scripts where LOGFILE value is changed from the 'checkpoint.log' set by set_vars, ensure the new assignment is before check_dirs is called, otherwise errors could be written to the 'wrong' log - in 'die()' - detect if running from terminal & also send output to stderr
#10	13931	C. Thomas Tyler	Routine merge-down to dev from main.
#9	13906	C. Thomas Tyler	Normalized P4INSTANCE to SDP_INSTANCE to get Unix/Windows implementations in sync. Reasons: 1. Things that interact with SDP in both Unix and Windows environments shoudn't have to account for this obscure SDP difference between Unix and Windows. (I came across this doing CBD work). 2. The Windows and Unix scripts have different variable names for defining the same concept, the SDP instance. Unix uses P4INSTANCE, while Windows uses SDP_INSTANCE. 3. This instance tag, a data set identifier, is an SDP concept. I prefer the SDP_INSTANCE name over P4INSTANCE, so I prpose to normalize to SDP_INSTANCE. 4. The P4INSTANCE name makes it look like a setting that might be recognized by the p4d itself, which it is not. (There are other such things such as P4SERVER that could perhaps be renamed as a separate task; but I'm not sure we want to totally disallow the P4 prefix for variable names. It looks too right to be wrong in same cases, like P4BIN and P4DBIN. That's a discussion for another day, outside the scope of this task). Meanwhile: * Fixed a bug in the Windows 2013.3 upgrade script that was referencing undefined P4INSTANCE, as the Windows environment defined only SDP_INSTANCE. * Had P4INSTANCE been removed completely, this change would likely cause trouble for users doing updates for existing SDP installations. So, though it involves slight technical debt, I opted to keep a redundant definition of P4INSTANCE in p4_vars.template, with comments indicating SDP_INSTANCE should be used in favor of P4INSTANCE, with a warning that P4INSTANCE may go away in a future release. This should avoid unnecessary upgrade pain. * In mkdirs.sh, the varialbe name was INSTANCE rather than SDP_INSTANCE. I changed that as well. That required manual change rather than sub/replace to avoid corrupting other similar varialbe names (e.g. MASTERINSTANCE). This is a trivial change technically (a substitute/replace, plus tweaks in p4_vars.template), but impacts many files.
#8	12169	Russell C. Jackson (Rusty)	Updated copyright date to 2015 Updated shell scripts to require an instance parameter to eliminate the need for calling p4master_run. Python and Perl still need it since you have to set the environment for them to run in. Incorporated comments from reviewers. Left the . instead of source as that seems more common in the field and has the same functionality.
#7	12028	C. Thomas Tyler	Refreshed SDP dev branch, merging down from main.
#6	11541	Russell C. Jackson (Rusty)	Keeping dev up to date.
#5	11535	Russell C. Jackson (Rusty)	Updated dev from main.
#4	11509	Russell C. Jackson (Rusty)	Added sync_replica.log to backup function log rotations, and added rm on existing gzipped logs with the same name in order to keep the script from hanging waiting for a response to overwrite. Added sync_shared_replica.sh and weekly_sync_shared_replica.sh to support replicas with shared depotdata storage. No rsync is necessary. The logs volume must not be a shared volume with these scripts though.
#3	11483	Russell C. Jackson (Rusty)	Brought over changes from RCJ backup_functions.sh
#2	11463	Russell C. Jackson (Rusty)	Updated dev to prepare for Summit agreed changes.
#1	10638	C. Thomas Tyler	Populate perforce_software-sdp-dev.
//guest/perforce_software/sdp/main/Server/Unix/p4/common/bin/backup_functions.sh
#1	10148	C. Thomas Tyler	Promoted the Perforce Server Deployment Package to The Workshop.