#!/bin/bash
set -u
#==============================================================================
# Declarations and Environment
declare ThisScript=${0##*/}
declare CmdLine="$0 $*"
declare Version=1.2.0
declare ThisUser=
declare ThisHost=
declare HostList=
declare HostList2=
declare ExcludeHostList=
declare SDPHostsCfg="/p4/common/site/config/sdp_hosts.cfg"
declare -i Excluded=0
declare -i HostCount=0
declare -i HostProcessedCount=0
declare -i OKCount=0
declare -i ErrorCount=0
declare -A HostStatus
declare -i Verbose=1
declare CmdAndArgs=
declare SDPEnvFile="/p4/common/bin/p4_vars"
declare SDPInstance=
declare -i Debug=0
declare -i NoOp=0
declare Log=
declare H1="=============================================================================="
declare H2="------------------------------------------------------------------------------"
#==============================================================================
# Local Functions
function msg () { echo -e "$*"; }
function dbg () { [[ "$Debug" -eq 0 ]] || msg "DEBUG: $*"; }
function errmsg () { msg "\\nError: ${1:-Unknown Error}\\n"; ErrorCount+=1; }
function bail () { errmsg "${1:-Unknown Error}"; exit "$ErrorCount"; }
#------------------------------------------------------------------------------
# Function: terminate
function terminate
{
# Disable signal trapping.
trap - EXIT SIGINT SIGTERM
# Stop logging.
[[ "$Log" != off && -f "$Log" ]] && msg "\\nLog is: $Log\\n${H1}"
# With the trap removed, exit.
exit "$ErrorCount"
}
#------------------------------------------------------------------------------
# Function: usage (required function)
#
# Input:
# $1 - style, either -h (for short form) or -man (for man-page like format).
# The default is -h.
#
# $2 - error message (optional). Specify this if usage() is called due to
# user error, in which case the given message displayed first, followed by the
# standard usage message (short or long depending on $1). If displaying an
# error, usually $1 should be -h so that the longer usage message doesn't
# obscure the error message.
#
# Sample Usage:
# usage
# usage -h
# usage -man
# usage -h "Incorrect command line usage."
#------------------------------------------------------------------------------
function usage
{
declare style=${1:--h}
declare errorMessage=${2:-Unset}
if [[ "$errorMessage" != Unset ]]; then
msg "\\n\\nUsage Error:\\n\\n$errorMessage\\n\\n"
fi
msg "USAGE for $ThisScript v$Version:
$ThisScript {-a [-e <host1>[,<host2>...]] | -r [-e <host1>[,<host2>...]] | -H <host1>[,<host2>...]} [-q] [-n] [-L <log>] [-si] [-d|-D]
[-c cmd and args ...]
or
$ThisScript [-h|-man|-V]
"
if [[ $style == -man ]]; then
echo -e "
DESCRIPTION:
This script executes a command on multiple SDP hosts.
OPTIONS:
-c Specify the command with options/arguments/flags to run on remote hosts.
After the '-c' flag occurs on the command line, the remainder of the
command line is interpreted as part of the command, not as options to
this script. Therefore, '-c' must always be specified after all other
options to this script.
If '-c' is not specified, the 'hostname' command is executed. This can be
useful for exercising and verifying SSH access to all machines.
Quoting what comes after '-c' is necessary if the command to run on
others hosts uses '&&', '||', or ';' to separate multi-part commands.
-a Specify the command is to be run on all SDP hosts, per
the ALL_SDP_HOSTS setting in $SDPHostsCfg.
-r Specify the command is to be run on all replica hosts. per
the SDP_REPLICA_HOSTS setting in $SDPHostsCfg.
-e <host1>[,<host2>...]
Specify an explict, comma-delimted list of hosts to exclude from
the list of hosts to command on; use this with '-a' or '-r'.
-H <host1>[,<host2>...]
Specify an explict, comma-delimted list of hosts to execute
the command on.
-q Specify '-q' (quiet mode) to suppress displaying the output of the
executed command. With '-q', only the exit code of the executed
command is run.
-n No-Operation mode. In this mode, commands are dsplayed instead of being executed.
-L <log>
Specify the path to a log file, or the special value 'off' to disable
logging. By default, all output (stdout and stderr) goes to
/tmp/${ThisScript}.<datestamp>.log
NOTE: This script is self-logging. That is, output displayed on the screen
is simultaneously captured in the log file. Using redirection operators like
'> log' and '2>&1' and 'tee' are unnecessary (but harmless).
-si Operate silently. All output (stdout and stderr) is redirected to the log
only; no output appears on the terminal. This cannot be used with
'-L off'.
-d Enable debug messages.
-D Set extreme debugging verbosity using bash 'set -x' mode. Implies '-d'.
HELP OPTIONS:
-h Display short help message
-man Display man-style help message
-V Display version info for this script.
FILES:
This script uses a file SDP hosts config file to define list of all hosts
and all replica hosts: $SDPHostsCfg
EXAMPLES:
EXAMPLE 1: Check the p4d version running on all hosts:
hrun -a -c p4 -ztag -F %serverVersion% info -s
EXAMPLE 2: Check the p4d version running on a list of hosts:
hrun -H bos-helix-01,bos-helix-02,nyc-helix-03 -c p4 -ztag -F %serverVersion% info
EXAMPLE 3: Check the p4d version staged on all hosts. Note usage of quoting:
hrun -a -c \"/p4/sdp/helix_binaries/p4d -V | grep ^Rev\"
EXAMPLE 4: Check replica health (replicas only). Note usage of quotes:
hrun -r -c \"p4 pull -ljv; p4 pull -ls\"
EXAMPLE 5: Check replica health (replicas only), skipping one. Note usage of
quotes:
hrun -a -e bos-helix-01 -c \"p4 pull -ljv; p4 pull -ls\"
EXAMPLE 6: Check server.id on all hosts:
hrun -a -c cat /p4/1/root/server.id
EXAMPLE 7: Check for an IP in /etc/hosts, quiet mode.
hrun -a -q -c grep 10.0.0.4 /etc/hosts
"
fi
exit 1
}
#------------------------------------------------------------------------------
# Function: run
#
# Short: Run a command with optional description, honoring $Debug and $NoOp
# settings.
#
# Input:
# $1 - cmd. The command to run with any arguments to the command.
# $2 - desc. Optional text description of command to run.
# $3 - honorNoOpFlag. Pass in 1 to mean "Yes, honor the $NoOp setting
# and display (but don't run) commands if $NoOp is set." Otherwise
# $NoOp is ignored, and the command is always run.
# This parameter is optional; the default value is 1.
# $4 - alwaysShowOutputFlag. If set to 1, show output regardless of $Debug
# value. Otherwise, only show output if Debug is not 0.
# This parameter is optional; the default value is 0.
# $5 - grepString. If set, this changes the exit code behavior.
# If the specified string exists in the output, a 0 is returned, else 1.
# Strings suitable for use with 'grep -E' are allowed.
#
# Description:
# Display an optional description of a command, then display the actual
# command to execute, and then run the command. Behavior is affected by $NoOp
# and $Debug. If $NoOp is set, the command is shown, but not run, provided
# $honorNoOpFlag is 1.
#
# The variables CMDLAST and CMDEXITCODE are set each time run() is called.
# CMDLAST contains the last command run. CMDEXITCODE contains its exit code.
#
# Output is shown if $Debug is 1 or $AlwaysShowOutputFlag is 1.
#------------------------------------------------------------------------------
function run () {
dbg "CALL: run ($*)"
declare cmd=${1:-}
declare desc=${2:-}
declare -i honorNoOpFlag=${3:-1}
declare -i alwaysShowOutputFlag=${4:-0}
declare grepString=${5:-}
declare cmdScript=
declare cmdOut=
declare -i grepExit
export CMDLAST="$cmd"
export CMDEXITCODE=0
cmdScript="$(mktemp "$P4TMP/cmd.XXXXXX.sh")"
cmdOut="${cmdScript%.sh}.out"
[[ -n "$desc" ]] && msg "$desc"
if [[ "$honorNoOpFlag" -eq 1 && "$NoOp" -eq 1 ]]; then
msg "NO-OP: Would run: \"$cmd\"\\n"
else
msg "Running: \"$cmd\""
echo -e "#!/bin/bash\n$cmd\n" > "$cmdScript"
chmod +x "$cmdScript"
$cmdScript > "$cmdOut" 2>&1
export CMDEXITCODE=$?
if [[ -n "$grepString" ]]; then
# shellcheck disable=SC2196
grep -E "$grepString" "$cmdOut" > /dev/null 2>&1
grepExit=$?
export CMDEXITCODE="$grepExit"
fi
if [[ "$alwaysShowOutputFlag" -eq 1 || "$Verbose" -eq 1 || "$Debug" -ne 0 ]]; then
cat "$cmdOut"
fi
# Be clean and tidy.
/bin/rm -f "$cmdScript" "$cmdOut"
# If a grep was requested, return the exit code from the 'grep -E',
# otherwise return the exit code of the command executed. In
# any case, $CMDEXITCODE contains the exit code of the command.
if [[ -n "$grepString" ]]; then
return "$grepExit"
else
return "$CMDEXITCODE"
fi
fi
return 0
}
#------------------------------------------------------------------------------
# Function: rrun
#
# Short: Run a command with on a remote host optional description, honoring
# $Debug and $NoOp settings.
#
# Input:
# $1 - host. The remote host to run the command on.
# $2 - cmd. The command to run with any arguments to the command.
# $3 - desc. Optional text description of command to run.
# $4 - honorNoOpFlag. Pass in 1 to mean "Yes, honor the $NoOp setting
# and display (but don't run) commands if $NoOp is set." Otherwise
# $NoOp is ignored, and the command is always run.
# This parameter is optional; the default value is 1.
# $4 - alwaysShowOutputFlag. If set to 1, show output regardless of $Debug
# value. Otherwise, only show output if Debug is not 0.
# This parameter is optional; the default value is 0.
# $6 - grepString. If set, this changes the exit code behavior.
# If the specified string exists in the output, a 0 is returned, else 1.
# Strings suitable for use with 'grep -E' are allowed.
#
# Description:
# Display an optional description of a command to run via ssh call to
# another host, then display the actual command to execute, and then run the
# command. Behavior is affected by $NoOp and $Debug. If $NoOp is set, the
# command is shown, but not run, provided $honorNoOpFlag is 1.
#
# The variables RCMDLAST and RCMDEXITCODE are set each time rrun() is called.
# RCMDLAST contains the last command run. RCMDEXITCODE contains its exit code.
#
# Output is shown if either $AlwaysShowOutputFlag is 1 or $VERBOSITY >= 4.
#------------------------------------------------------------------------------
function rrun () {
dbg "CALL: rrun ($*)"
declare host=${1:-Unset}
declare cmd=${2:-Unset}
declare desc=${3:-}
declare -i honorNoOpFlag=${4:-1}
declare -i alwaysShowOutputFlag=${5:-0}
declare grepString=${6:-}
declare rCmdScript=
declare rCmdOut=
declare -i grepExit
export RCMDLAST="$cmd"
export RCMDEXITCODE=0
rCmdScript="$(mktemp "${P4TMP:-/tmp}/rcmd.XXXXXX.sh")"
rCmdOut="${rCmdScript%.sh}.out"
[[ -n "$desc" ]] && msg "$desc"
if [[ "$honorNoOpFlag" -eq 1 && "$NoOp" -eq 1 ]]; then
msg "NO-OP: Would run: \"$cmd\" on host $host.\\n"
else
msg "Running: \"$cmd\" on host $host."
echo -e "#!/bin/bash\n$cmd\n" > "$rCmdScript"
chmod +wx "$rCmdScript"
if ! scp -pq "$rCmdScript" "$host:${P4TMP:-/tmp}/."; then
export RCMDEXITCODE=-1
errmsg "rrun(): Failed to copy temp command script to $host."
return 1
fi
ssh -q -n "$host" "$rCmdScript" > "$rCmdOut" 2>&1
export RCMDEXITCODE=$?
if [[ -n "$grepString" ]]; then
# shellcheck disable=SC2196
grep -E "$grepString" "$rCmdOut" > /dev/null 2>&1
grepExit=$?
export RCMDEXITCODE="$grepExit"
fi
if [[ "$alwaysShowOutputFlag" -eq 1 || "$Verbose" -eq 1 || "$Debug" -ne 0 ]]; then
cat "$rCmdOut"
fi
# Be clean and tidy.
/bin/rm -f "$rCmdScript" "$rCmdOut"
# If a grep was requested, return the exit code from the 'grep -E',
# otherwise return the exit code of the command remotely executed.
# In any case, $RCMDEXITCODE contains the exit code of the command.
if [[ -n "$grepString" ]]; then
return "$grepExit"
else
return "$RCMDEXITCODE"
fi
fi
return 0
}
#==============================================================================
# Command Line Processing
declare -i shiftArgs=0
declare -i SilentMode=0
set +u
while [[ $# -gt 0 ]]; do
case $1 in
(-h) usage -h;;
(-man) usage -man;;
(-V) msg "$ThisScript version $Version"; exit 0;;
(-i) SDPInstance="$2"; shiftArgs=1;;
(-H) HostList="$2"; shiftArgs=1;;
(-e) ExcludeHostList="$2"; shiftArgs=1;;
(-a) HostList=ALL;;
(-r) HostList=ALL_REPLICAS;;
(-q) Verbose=0;;
(-L) Log="$2"; shiftArgs=1;;
(-si) SilentMode=1;;
(-n) NoOp=1;;
(-d) Debug=1;;
(-D) Debug=1; set -x;; # Extreme debug; use bash 'set -x' mode.
(-c) shift; CmdAndArgs="$*"; shiftArgs=$#; shiftArgs=$((shiftArgs-1));;
(-*) usage -h "Unknown option [$1].";;
(*) usage -h "Unknown argument [$1].";;
esac
# Shift (modify $#) the appropriate number of times.
shift; while [[ $shiftArgs -gt 0 ]]; do
[[ $# -eq 0 ]] && usage -h "Incorrect number of arguments."
shiftArgs=$shiftArgs-1
shift
done
done
set -u
#==============================================================================
# Command Line Validation
[[ -n "$SDPInstance" ]] || SDPInstance="${SDP_INSTANCE:-hms}"
# shellcheck disable=SC1090
source "$SDPEnvFile" "$SDPInstance" ||\
bail "Failed to load SDP shell environment for instancce $SDPInstance."
# shellcheck disable=SC1090
source "$SDPHostsCfg" ||\
bail "Failed to load sdp_hosts.cfg file."
[[ -n "$Log" ]] || Log="/tmp/${ThisScript%.sh}.$(date +'%Y%m%d-%H%M%S').log"
#==============================================================================
# Main Program
trap terminate EXIT SIGINT SIGTERM
ThisUser=$(whoami)
ThisHost=${HOSTNAME%%.*}
[[ -z "$CmdAndArgs" ]] && CmdAndArgs="hostname"
if [[ "$HostList" == ALL ]]; then
HostList="${ALL_SDP_HOSTS:-}"
elif [[ "$HostList" == ALL_REPLICAS ]]; then
HostList="${SDP_REPLICA_HOSTS:-}"
fi
if [[ -n "$HostList" ]]; then
HostList=$(echo "$HostList"| tr ',' ' ')
else
bail "No target hosts specified. Specify '-a', '-r', or '-H <host1>[,<host2>...]'."
fi
if [[ -n "$ExcludeHostList" ]]; then
ExcludeHostList=$(echo "$ExcludeHostList" | tr ',' ' ')
HostList2=
for host in $HostList; do
Excluded=0
for ehost in $ExcludeHostList; do
[[ "$host" == "$ehost" ]] && Excluded=1
done
[[ "$Excluded" -eq 0 ]] && HostList2+=" $host"
done
HostList=$(echo "$HostList2" | sed -e 's/^[[:space:]]*//;s/[[:space:]]*$//')
fi
if [[ "$Log" != off ]]; then
touch "$Log" || bail "Couldn't touch log file: $Log"
# Redirect stdout and stderr to a log file.
if [[ "$SilentMode" -eq 0 ]]; then
exec > >(tee "$Log")
exec 2>&1
else
exec >"$Log"
exec 2>&1
fi
msg "${H1}\\nLog is: $Log"
fi
dbg "Starting $ThisScript v$Version as $ThisUser@${ThisHost} on $(date) with command line:\\n$CmdLine"
msg "User: $ThisUser\\nLauch Host: $ThisHost\\nCommand and Args:\\n\\t$CmdAndArgs\\n"
msg "Host List: $HostList"
if [[ "$NoOp" -eq 1 ]]; then
msg "\\nNoOp (No Operation/preview) mode enabled."
fi
for host in $HostList; do
HostCount+=1
HostStatus[$host]="Not Attempted."
done
for host in $HostList; do
HostProcessedCount+=1
msg "${H2}"
if rrun "$host" "$CmdAndArgs"; then
msg "Command OK on host $host."
OKCount+=1
HostStatus[$host]="OK"
else
errmsg "Command returned exit code '$RCMDEXITCODE' on host '$host'."
HostStatus[$host]="FAIL"
fi
done
msg "\\nSummary:"
if [[ "$ErrorCount" -eq 0 ]]; then
msg "Command executed successfully on all $HostProcessedCount hosts."
else
msg "Command executed on $HostProcessedCount of $HostCount hosts, OK on $OKCount, $ErrorCount with errors."
fi
msg "\\nStatus on each host:"
for host in $HostList; do
printf " %-16s %-s\n" "$host" "${HostStatus[$host]}"
done
msg "That took $((SECONDS/3600)) hours $((SECONDS%3600/60)) minutes $((SECONDS%60)) seconds.\\n"