# hms_actions.sh Version=1.0.35 #============================================================================== # Copyright and license info is available in the LICENSE file included with # the Server Deployment Package (SDP), and also available online: # https://swarm.workshop.perforce.com/projects/perforce-software-sdp/view/main/LICENSE #------------------------------------------------------------------------------ #============================================================================== # HMS Library Functions. #------------------------------------------------------------------------------ # change_component_status (start/stop, $component, $tag) # $1 - start or stop # $2 - fully qualified component name (<instance>:<component>, e.g. # 1:p4broker01). # $3 - optional tag, used only for brokers, e.g. "dfm" #------------------------------------------------------------------------------ function change_component_status () { vvmsg "CALL: change_component_status ($*)" declare command=${1:-Unset} declare fqComponent=${2:-Unset} declare tag=${3:-""} declare componentType= declare componentCommand= declare componentMasterHost= declare instance= declare initScript= declare output=$P4U_TMPDIR/ccs.${fqComponent/:/_}.$RANDOM$RANDOM declare -i exitCode=0 if [[ $fqComponent != *":"* ]]; then errmsg "Unable to start/stop entire instance yet; this will be implemented later." return 1 fi instance=${fqComponent%%:*} if [[ $command =~ ^(start|stop)$ ]]; then componentType=${ComponentType[$fqComponent]:-Unset} componentMasterHost=${ComponentMasterHost[$fqComponent]:-Unset} case "$componentType" in (p4d-*) initScript="/p4/$instance/bin/p4d_${instance}_init";; (p4broker) initScript="/p4/$instance/bin/p4broker_${instance}_init";; (p4p) initScript="/p4/$instance/bin/p4p_${instance}_init";; (*) errmsg "Don't know how to start component [$fqComponent] of type [$componentType]." return 1 ;; esac if [[ -n "$initScript" ]]; then componentCommand="$initScript $command $tag" else errmsg "No init script for component [$fqComponent]." return 1 fi runRemoteCmd "$componentMasterHost" "$componentCommand" \ "Executing $command on $componentType $fqComponent on $componentMasterHost" 1 1 0 exitCode=$RCMDEXITCODE [[ $exitCode -eq 0 ]] || return 1 else errmsg "Unrecognized start/stop command [$command]." return 1 fi return 0 } function get_replica_status () { vvmsg "CALL: get_replica_status ($*)" declare instance=${1:-Unset} declare -i summaryExitCode=0 [[ $instance == Unset ]] && return 1 msg "Getting list of managed components for instance $instance." componentList=$(get_component_list "$instance" 1) if [[ $? -ne 0 ]]; then errmsg "Could not get a list of valid components for instance $instance. Aborting replica status." return 1 fi source /p4/common/bin/p4_vars $instance for fqComponent in $componentList; do componentType=${ComponentType[$fqComponent]:-Unset} componentMasterHost=${ComponentMasterHost[$fqComponent]:-Unset} componentURL=${ComponentURL[$fqComponent]:-Unset} componentManaged=${ComponentManaged[$fqComponent]:-Unset} vvmsg "Type=$componentType Master=$componentMasterHost URL=$componentURL managed=$componentManaged" [[ $componentType == p4d-e ]] || [[ $componentType == p4d-r ]] || continue if [[ $componentURL == "ssl:"* ]]; then componentP4PORT="ssl:$componentMasterHost:${componentURL#ssl:}" else componentP4PORT="$componentMasterHost:$componentURL" fi componentStatusCmd="$P4BIN -s -p $componentP4PORT pull -lj " componentStatusCode=0 run "$componentStatusCmd" \ "3:Checking replica status for $fqComponent .|4:Checking with command: $componentStatusCmd" 1 1 ||\ componentStatusCode=1 done } function get_diskspace () { vvmsg "CALL: get_diskspace ($*)" declare instance=${1:-Unset} declare -i summaryExitCode=0 [[ $instance == Unset ]] && return 1 msg "Getting list of managed components for instance $instance." componentList=$(get_component_list "$instance" 1) if [[ $? -ne 0 ]]; then errmsg "Could not get a list of valid components for instance $instance. Aborting replica status." return 1 fi source /p4/common/bin/p4_vars $instance for fqComponent in $componentList; do componentType=${ComponentType[$fqComponent]:-Unset} componentMasterHost=${ComponentMasterHost[$fqComponent]:-Unset} componentURL=${ComponentURL[$fqComponent]:-Unset} componentManaged=${ComponentManaged[$fqComponent]:-Unset} [[ $componentType == p4d-e ]] || [[ $componentType == p4d-r ]] || continue if [[ $componentURL == "ssl:"* ]]; then componentP4PORT="ssl:$componentMasterHost:${componentURL#ssl:}" else componentP4PORT="$componentMasterHost:$componentURL" fi componentStatusCmd="$P4BIN -s -p $componentP4PORT df " componentStatusCode=0 run "$componentStatusCmd" \ "3:Checking diskspace for $fqComponent .|4:Checking with command: $componentStatusCmd" 1 1 ||\ componentStatusCode=1 done } #------------------------------------------------------------------------------ # Function: down_for_maintenance ($subCommand, $instance) # $1 - on, off, or status # $2 - Instance #------------------------------------------------------------------------------ function down_for_maintenance () { vvmsg "CALL: down_for_maintenance ($*)" declare subCommand=${1:-Unset} declare instance=${2:-Unset} declare -i summaryExitCode=0 [[ $instance == Unset || $subCommand == Unset ]] && return 1 msg "Getting list of managed components for instance $instance." componentList=$(get_component_list "$instance" 1) if [[ $? -ne 0 ]]; then errmsg "Could not get a list of valid components for instance $instance. Aborting dfm." return 1 fi # Put all brokers into Down For Maintenance mode (on), or bring # them out (off), or check on them (status). msg "Setting Down For Maintenance message on all available brokers." for fqComponent in $componentList; do componentType=${ComponentType[$fqComponent]:-Unset} [[ $componentType == p4broker ]] || continue componentMasterHost=${ComponentMasterHost[$fqComponent]:-Unset} if [[ $subCommand == on ]]; then change_component_status stop "$fqComponent" change_component_status start "$fqComponent" dfm elif [[ $subCommand == off ]]; then change_component_status stop "$fqComponent" dfm change_component_status start "$fqComponent" elif [[ $subCommand == status ]]; then get_component_status "$fqComponent" 1 dfm else errmsg "Unknown dfm subcommand [$subCommand]." return 1 fi done } #------------------------------------------------------------------------------ # Function: execute_instance_failover ($instance, $path, $style) # # This function executes failover of all managed components for the instance. # # This is called by failover(), which does data verification. This routine # can thus dispense with some of the "defensive programming" done in # failover(), allowing us to assume our data model (implemenented primarily # with associative arrays) has what we need. #------------------------------------------------------------------------------ function execute_instance_failover () { vvmsg "CALL: execute_instance_failover ($*)" declare instance=${1:-Unset} declare path=${2:-Unset} declare style=${3:-Unset} declare componentList= declare componentType= declare componentMasterHost= declare brokerDfmOutput= declare cmd= declare -i summaryExitCode=0 [[ $instance == Unset || $path == Unset || $style == Unset ]] && return 1 msg "Getting list of managed components for instance $instance." componentList=$(get_component_list "$instance" 1) if [[ $? -ne 0 ]]; then errmsg "Could not get a list of valid components for instance $instance. Aborting failover." return 1 fi # Put all (available) brokers in Down For Maintenance mode. msg "Setting Down For Maintenance message on all available brokers." down_for_maintenance on $instance for fqComponent in $componentList; do componentType=${ComponentType[$fqComponent]:-Unset} [[ $componentType == p4d-e ]] || break failover_p4d_edge "$path" "$fqComponent" "$style" ||\ summaryExitCode=1 done for fqComponent in $componentList; do componentType=${ComponentType[$fqComponent]:-Unset} [[ $componentType == p4d-mc ]] || break failover_p4d_master "$path" "$fqComponent" "$style" ||\ summaryExitCode=1 done for fqComponent in $componentList; do componentType=${ComponentType[$fqComponent]:-Unset} case "$componentType" in (p4p|p4dtg|p4web|p4gf|swarm) warnmsg "Failover for component type [$componentType] not impelmented." ;; (Unset) errmsg "ComponentType not defined for component $component." summaryExitCode=1 ;; esac done # Finally, bring all (available) brokers back online. down_for_maintenance off $instance return $summaryExitCode } #------------------------------------------------------------------------------ # Function: execute_edge_failover ($instance, $path, $style, $component) # # This function executes failover of an edge server. # # This is called by failover(), which does data verification. This routine # can thus dispense with some of the "defensive programming" done in # failover(), allowing us to assume our data model (implemenented primarily # with associative arrays) has what we need. #------------------------------------------------------------------------------ function execute_edge_failover () { vvmsg "CALL: execute_edge_failover ($*)" declare instance=${1:-Unset} declare path=${2:-Unset} declare style=${3:-Unset} declare component=${4:-Unset} declare -i summaryExitCode=0 [[ $instance == Unset || $path == Unset || $style == Unset || $component == Unset ]] && return 1 fqComponent="${instance}:${component}" failover_p4d_edge "$path" "$fqComponent" "$style" ||\ summaryExitCode=1 return $summaryExitCode } #------------------------------------------------------------------------------ # Function: failover_p4d_edge ($path, $fqComponent, $style) #------------------------------------------------------------------------------ function failover_p4d_edge () { vvmsg "CALL: failover_p4d_edge ($*)" declare path=${1:-Unset} declare fqComponent=${2:-Unset} declare style=${3:-Unset} declare backupHost= declare instance= declare edgeHost= declare backupHost= declare instance= declare fqPath= declare newServerID= declare -i exitCode=0 summaryExitCode=0 [[ $path == Unset || $fqComponent == Unset || $style == Unset ]] && return 1 instance=${fqComponent%%:*} fqPath=$instance:$path edgeHost=${FailoverMasterHost[$fqPath]:-Unset} backupHost=${FailoverBackupHost[$fqPath]:-Unset]} failoverType=${FailoverType[$fqPath]:-Unset} if [[ $failoverType == EdgeLocal ]]; then runRemoteCmd "$edgeHost" \ "$HMS_SCRIPTS/failover_p4d_local.sh $instance $style $NO_OP" \ "Failing over to offline dbs locally on $edgeHost. Running: failover_p4d_local.sh I=$instance S=$style NO_OP=$NO_OP" 1 1 0 exitCode=$RCMDEXITCODE summaryExitCode=$exitCode if [[ $exitCode -eq 0 ]]; then msg "Local Failover on $edgeHost completed OK." else errmsg "Local Failover on $edgeHost FAILED." return 1 fi elif [[ $failoverType == EdgeMO || $failoverType == EdgeFull ]]; then # The difference between Scheduled and Unscheduled failover is partly one # of expectations. In Unscheduled Failover, we don't expect the edge # to respond. But in either case, we do our best to avoid split brain, # and try to shut it down. if [[ $style == Scheduled ]]; then msg "Shutting down edge server for Scheduled Failover. We expect it to shutdown cleanly." else msg "Attempting shutdown of edge server for Unscheduled Failover. It may or may not respond." fi runRemoteCmd "$edgeHost" \ "$HMS_SCRIPTS/failover_p4d_from_this_host.sh $instance $style $failoverType $backupHost $NO_OP" \ "Failing over from $edgeHost. Running: failover_p4d_from_this_host.sh I=$instance S=$style T=$failoverType BH=$backupHost NO_OP=$NO_OP" 1 1 0 exitCode=$RCMDEXITCODE if [[ $exitCode -ne 0 ]]; then if [[ $style == Scheduled ]]; then errmsg "Shutdown of edge server FAILED during Scheduled Failover. Aborting failover of $fqComponent." return 1 else warnmsg "Shutdown of edge server failed during Unscheduled Failover. Continuing with failover of $fqComponent." fi fi # newServerID is the server spec we want the edge server to have, # the ServerID of the original edge sever. newServerID=${path#p4d_} newServerID="p4d_${path}" runRemoteCmd "$backupHost" \ "$HMS_SCRIPTS/failover_p4d_to_this_host.sh $instance $style $failoverType $newServerID $NO_OP" \ "Failing over to $backupHost. Running: $HMS_SCRIPTS/failover_p4d_to_this_host.sh I=$instance S=$style T=$failoverType SID=$newServerID NO_OP=$NO_OP" 1 1 0 exitCode=$RCMDEXITCODE if [[ $exitCode -ne 0 ]]; then errmsg "Failover of $fqComponent to backup host [$backupHost] failed." summaryExitCode=1 fi else errmsg "Unknown failover type ($failoverType), aborting failover of component $fqComponent." return 1 fi return $summaryExitCode } #------------------------------------------------------------------------------ # Function: failover_p4d_master ($path, $fqComponent, $style) #------------------------------------------------------------------------------ function failover_p4d_master () { vvmsg "CALL: failover_p4d_master ($*)" declare path=${1:-Unset} declare fqComponent=${2:-Unset} declare style=${3:-Unset} declare masterHost= declare backupHost= declare instance= declare fqPath= declare newServerID= declare -i exitCode=0 summaryExitCode=0 [[ $path == Unset || $fqComponent == Unset || $style == Unset ]] && return 1 instance=${fqComponent%%:*} fqPath=$instance:$path masterHost=${ComponentMasterHost[$fqComponent]:-Unset} backupHost=${FailoverBackupHost[$fqPath]:-Unset]} failoverType=${FailoverType[$fqPath]:-Unset} if [[ $failoverType == Local ]]; then runRemoteCmd "$masterHost" \ "$HMS_SCRIPTS/failover_p4d_local.sh $instance $style $NO_OP" \ "Failing over to offline dbs locally on $masterHost. Running: failover_p4d_local.sh I=$instance S=$style NO_OP=$NO_OP" 1 1 0 exitCode=$RCMDEXITCODE summaryExitCode=$exitCode if [[ $exitCode -eq 0 ]]; then msg "Local Failover on $masterHost completed OK." else errmsg "Local Failover on $masterHost FAILED." return 1 fi elif [[ $failoverType == MO || $failoverType == Full ]]; then # The difference between Scheduled and Unscheduled failover is partly one # of expectations. In Unscheduled Failover, we don't expect the master # to respond. But in either case, we do our best to avoid split brain, # and try to shut it down. if [[ $style == Scheduled ]]; then msg "Shutting down master server for Scheduled Failover. We expect it to shutdown cleanly." else msg "Attempting shutdown of master server for Unscheduled Failover. It may or may not respond." fi runRemoteCmd "$masterHost" \ "$HMS_SCRIPTS/failover_p4d_from_this_host.sh $instance $style $failoverType $backupHost $NO_OP" \ "Failing over from $masterHost. Running: failover_p4d_from_this_host.sh I=$instance S=$style T=$failoverType BH=$backupHost NO_OP=$NO_OP" 1 1 0 exitCode=$RCMDEXITCODE if [[ $exitCode -ne 0 ]]; then if [[ $style == Scheduled ]]; then errmsg "Shutdown of master server FAILED during Scheduled Failover. Aborting failover of $fqComponent." return 1 else warnmsg "Shutdown of master server failed during Unscheduled Failover. Continuing with failover of $fqComponent." fi fi # newServerID is the server spec we want the master server to have. This is usually # 'master', but may have some other value. newServerID=${fqComponent#*:} runRemoteCmd "$backupHost" \ "$HMS_SCRIPTS/failover_p4d_to_this_host.sh $instance $style $failoverType $newServerID $NO_OP" \ "Failing over to $backupHost. Running: $HMS_SCRIPTS/failover_p4d_to_this_host.sh I=$instance S=$style T=$failoverType SID=$newServerID NO_OP=$NO_OP" 1 1 0 exitCode=$RCMDEXITCODE if [[ $exitCode -ne 0 ]]; then errmsg "Failover of $fqComponent to backup host [$backupHost] failed." summaryExitCode=1 fi else errmsg "Unknown failover type ($failoverType), aborting failover of component $fqComponent." return 1 fi return $summaryExitCode } #------------------------------------------------------------------------------ # Function: failover ($path, $scope, $style) #------------------------------------------------------------------------------ function failover () { vvmsg "CALL: failover ($*)" declare path=${1:-Unset} declare scope=${2:-Unset} declare style=${3:-Unset} declare scopeType= declare scopeHost= declare scopeInstance= declare fqPath= declare instance= declare instanceList= declare iPath= declare pathActive= declare failoverType= declare masterHost= declare backupHost= declare edgeHost= declare reply=Unset declare -i found declare -i status=0 # Validate instance scope. if [[ $scope == "i:"* ]]; then scopeType=Instance scopeInstance=${scope#i:} found=0; for instance in ${!InstanceManaged[*]}; do if [[ $scopeInstance == $instance ]]; then found=1 break fi done if [[ $found -eq 0 ]]; then errmsg "Invalid SDP instance specified with scope of $scope." return 1 fi # Validate host scope. elif [[ $scope == "h:"* ]]; then scopeType=Host scopeHost=${scope#h:} found=0; for host in ${FailoverMasterHost[*]}; do if [[ $scopeHost == $host ]]; then found=1 break fi done if [[ $found -eq 0 ]]; then errmsg "Invalid failover host specified with scope of $scope." return 1 fi else errmsg "Badly formatted scope value [$scope]." return 1 fi if [[ $scopeType == Instance ]]; then msg "Verified: Scope instance [$scopeInstance] is valid." else msg "Verified: Scope host [$scopeHost] is valid." fi # Verify the specified path is valid and active for each instance. if [[ $scopeType == Host ]]; then msg "Finding instances mastered on host $scopeHost." found=0; for instance in ${!InstanceManaged[*]}; do fqPath="$instance:$path" pathActive=${FailoverActive[$fqPath]:-Unset} instanceMasterHost="${InstanceMasterHost[$instance]:-Unset}" [[ $scopeHost == $instanceMasterHost ]] || continue if [[ $pathActive == 1 ]]; then msg "Verified: Failover path [$path] is active for instance [$instance]." elif [[ $pathActive == 0 ]]; then warnmsg "Failover path [$path] is inactive for instance [$instance]. Skipping it." continue else errmsg "No failover path [$path] is configured for instance [$instance]. Skipping it." status=1 continue fi # Next, confirm we have a configured target failover host (unless # FailoverType is 'local'). failoverType=${FailoverType[$fqPath]:-Unset} if [[ $failoverType =~ ^(Full|MO)$ ]]; then backupHost=${FailoverBackupHost[$fqPath]:-Unset]} if [[ $backupHost == Unset ]]; then errmsg "No backup host configured for instance:path [$fqPath]. Skipping it." status=1 continue else msg "Instance $instance will failover from $masterHost to $backupHost." fi elif [[ $failoverType =~ ^(EdgeFull|EdgeMO)$ ]]; then backupHost=${FailoverBackupHost[$fqPath]:-Unset]} edgeHost=${FailoverMasterHost[$fqPath]:-Unset]} if [[ $backupHost == Unset ]]; then errmsg "No backup host configured for instance:path [$fqPath]. Skipping it." status=1 continue else msg "Instance $instance will failover from $edgeHost to $backupHost." fi elif [[ $failoverType == Local ]]; then msg "Local Failover will use offline databases for instance $instance on host [$masterHost]." backupHost=localhost elif [[ $failoverType == EdgeLocal ]]; then msg "Edge Local Failover will use offline databases for instance $instance on host [$edgeHost]." backupHost=localhost else errmsg "Could not determine failover type for [$fqPath]. Skipping it." status=1 continue fi if [[ $found -eq 0 ]]; then found+=1 instanceList="$instance" else instanceList+=" $instance" fi done if [[ $found -gt 0 ]]; then msg "$found instance(s) mastered on $scopeHost are targeted for failover: $instanceList." else errmsg "No instances mastered on host $scopeHost. Aborting failover." return 1 fi else instance=$scopeInstance instanceList="$instance" fqPath="$instance:$path" pathActive=${FailoverActive[$fqPath]:-Unset} if [[ $pathActive == 1 ]]; then msg "Verified: Failover path [$path] is active for instance [$instance]." elif [[ $pathActive == 0 ]]; then errmsg "Failover path [$path] is inactive for instance [$instance]. Aborting." return 1 else errmsg "No failover path [$path] is configured for instance [$instance]. Aborting." return 1 fi failoverType=${FailoverType[$fqPath]:-Unset} masterHost=${InstanceMasterHost[$instance]:-Unset} if [[ $masterHost == Unset ]]; then errmsg "No master host is configured for instance [$instance]. Aborting." return 1 fi if [[ $failoverType =~ ^(Full|MO)$ ]]; then backupHost=${FailoverBackupHost[$fqPath]:-Unset]} if [[ $backupHost == Unset ]]; then errmsg "No backup host configured for instance:path [$fqPath]. Aborting." return 1 else msg "Instance $instance will failover from $masterHost to $backupHost." fi elif [[ $failoverType =~ ^(EdgeFull|EdgeMO)$ ]]; then backupHost=${FailoverBackupHost[$fqPath]:-Unset]} edgeHost=${FailoverMasterHost[$fqPath]:-Unset]} if [[ $backupHost == Unset ]]; then errmsg "No backup host configured for instance:path [$fqPath]. Aborting." return 1 else msg "Instance $instance will failover from $edgeHost to $backupHost." fi elif [[ $failoverType == Local ]]; then msg "Local Failover will use offline databases for instance $instance on host [$masterHost]." backupHost=localhost elif [[ $failoverType == EdgeLocal ]]; then msg "Edge Local Failover will use offline databases for instance $instance on host [$edgeHost]." backupHost=localhost else errmsg "Could not determine failover type for [$fqPath]. Skipping it." return 1 fi fi # Confirm user intent. if [[ $Interactive -eq 1 ]]; then while [[ $reply == Unset ]]; do echo -n -e "\nConfirm your intent to initiate failover: [Y/y/N/n]: " read -e reply [[ ${reply^^} == Y || ${reply^^} == N ]] && break msg "\nInvalid input specified ('$reply'), specify Y or N only.\n" reply=Unset done else msg "Failover intent confirmed non-interactively with '-y'." reply=Y fi if [[ ${reply^^} == N ]]; then msg "Failover not confirmed. Failover aborted." return 1 fi msg "Starting Failover for SDP instance(s): $instanceList." for instance in $instanceList; do if [[ "$failoverType" != "Edge"* ]]; then execute_instance_failover "$instance" "$path" "$style" else execute_edge_failover "$instance" "$path" "$style" "$edgeHost" fi done return $status } #------------------------------------------------------------------------------ # Function: get_component_list ($target, $onlyManaged) # # Given a target as specified on the command line, return a valid list of # components, or return 1 if we fail to return a valid list of components. # # The target may look like: all|<instance>[:<component>] # # If called with a specified component, i.e. something like 1:p4d01, then # show a warning if that component is not managed. # # If called with 'all' or an instance name, get the list of all defined # components (globally or for the specified instance), regardless of whether # they are managed. # # Output: The output to stdout is the valid component list. Any warnings or # errors are sent to stderr. An exit code of 0 indicates valid components # were returned. # # Sample Usage: list=$(get_component_list "abc") || bail "No valid instances!" # #------------------------------------------------------------------------------ function get_component_list () { declare target=${1:-Unset} declare onlyManaged=${2:-0} declare component= declare fqComponent= declare instanceComponents= declare componentList= declare componentManaged= [[ $target == Unset ]] && return 1 if [[ $target == "all" ]]; then componentList="" for component in ${!ComponentManaged[*]}; do # Build a list of all define components, or only managed ones, # depending on whether $onlyManaged. if [[ $onlyManaged -eq 1 ]]; then componentManaged=${ComponentManaged[$component]:-Invalid} if [[ $componentManaged == 1 ]]; then componentList+=" $component" elif [[ $componentManaged == Invalid ]]; then errnmsg "Component $target is invalid." >&2 fi else componentList+=" $component" fi done componentList=$(echo $componentList) elif [[ $target == *":"* ]]; then # If target contains a colon, user specified a single component # named as "instance:component". componentList=$target componentManaged=${ComponentManaged[$target]:-Invalid} if [[ $componentManaged != Invalid ]]; then if [[ $componentManaged -eq 0 ]]; then warnmsg "Component $target is not managed by HMS." >&2 fi else errmsg "The specified component $target is invalid." >&2 return 1 fi else # Get all components for a given instance. In this case, the # $target was specified as just an SDP instance name. instanceComponents=${InstanceComponents[$target]:-Unset} if [[ $instanceComponents == Unset ]]; then errmsg "No components configured for instance $target." >&2 return 1 fi componentList= for component in $instanceComponents; do fqComponent=$target:$component # Build a list of all define components, or only managed ones, # depending on whether $onlyManaged. if [[ $onlyManaged -eq 1 ]]; then componentManaged=${ComponentManaged[$fqComponent]:-Invalid} if [[ $componentManaged == 1 ]]; then componentList+=" $fqComponent" elif [[ $componentManaged == Invalid ]]; then errnmsg "Component $fqComponent is invalid." >&2 fi else componentList+=" $fqComponent" fi done componentList=$(echo $componentList) fi if [[ -n "$componentList" ]]; then echo $componentList else return 1 fi return 0 } #------------------------------------------------------------------------------ # Function: get_component_status ($component, $optimistic, $tag) # # Input: # $1 - A fully qualified component name of the form <Instance>:<ComponentName> # # $2 - "Optimistic" setting, 1 or 0 (the default). By default, components that # don't yet have status logic coded report as failed. If 1, components of # known types report as OK. Components of unknown type always report as # failed. (Any such components should be marked inactive in the topology # config file). # # $3 - optional tag for broker components, e.g. 'dfm'. # # Exit Code: # Returns a "happy" '0' if component is up/online. Otherwise, returns one of # the following: # 1 Component down. # 2 Component offline (applies to broker only). # 11 Status not yet implemented for this component type. # 12 Unknown component type. # 13 Component host not configured. # 99 Unknown (internal error). # # Output: In all cases, a message is displayed indicating component status. # #------------------------------------------------------------------------------ function get_component_status () { vvmsg "CALL: get_component_status ($*)" declare component=${1:-Unset} declare optimistic=${2:-0} declare tag=${3:-default} declare instance= declare componentType= declare componentHost= declare componentURL= declare componentP4PORT= declare componentStatusCmd= declare -i componentStatusCode= declare componentStatusMsg= declare componentVersion= declare brokerStatusFile=$P4U_TMPDIR/broker_status.${component/:/_}.out [[ $component == Unset ]] && return 1 get_component_version "$component" componentVersion=${ComponentVersion[$component]} instance=${component%%:*} componentType=${ComponentType[$component]:-Unset} componentURL=${ComponentURL[$component]:-Unset} componentManaged=${ComponentManaged[$component]:-Unset} componentMasterHost=${ComponentMasterHost[$component]:-Unset} if [[ "$componentMasterHost" == Unset ]]; then errmsg "No master host configured for component $component." componentStatusCode=13 elif [[ $componentManaged == 1 ]]; then case "$componentType" in (p4d-mc|p4d-e|p4d-r) if [[ $componentURL == "ssl:"* ]]; then componentP4PORT="ssl:${ComponentMasterHost[$component]:-Unset}:${componentURL#ssl:}" else componentP4PORT="${ComponentMasterHost[$component]:-Unset}:$componentURL" fi componentStatusCmd="$P4CBIN/irun -i hms -L off -v1 $P4BIN -s -p $componentP4PORT info -s" componentStatusCode=0 run "$componentStatusCmd" \ "3:Checking status for $component of type $componentType.|4:Checking with command: $componentStatusCmd" 0 0 ||\ componentStatusCode=1 ;; (p4broker) componentStatusCmd="/p4/${instance}/bin/p4broker_${instance}_init status $tag" msg "Checking status of $component on $componentMasterHost" vmsg "Executing: ssh -q -n -l $OSUSER $componentMasterHost \"$componentStatusCmd\"" ssh -q -n -l $OSUSER $componentMasterHost "$componentStatusCmd" > $brokerStatusFile 2>&1 [[ $VERBOSITY -gt 3 ]] && cat $brokerStatusFile fgrep "is running as" $brokerStatusFile > /dev/null 2>&1 if [[ $? -eq 0 ]]; then componentStatusCode=0 msg "Verified: $component is OK." else fgrep "is NOT running" $brokerStatusFile > /dev/null 2>&1 if [[ $? -eq 0 ]]; then componentStatusCode=2 msg "Verified: $component is offline." else componentStatusCode=1 errmsg "Component $component status unknown." fi fi rm -f $brokerStatusFile ;; (p4p|p4dtg|p4web|p4gf) warnmsg "Sorry, can't status components of type $componentType just yet." componentStatusCode=11 [[ $optimistic -eq 1 ]] && componentStatusCode=0 ;; (swarm) warnmsg "Sorry, can't status components of type swarm just yet." msg "Check the Swarm worker queue with wget using the configured URL ($componentURL)." componentStatusCode=11 [[ $optimistic -eq 1 ]] && componentStatusCode=0 ;; (*) warnmsg "Unknown component type ($componentType) configured." componentStatusCode=11 ;; (Unset) errmsg "ComponentType not defined for component $component." componentStatusCode=12 ;; esac else warnmsg "Component $component is not managed by HMS. Assuming it is OK." componentStatusCode=0 fi case "$componentStatusCode" in (0) componentStatusMsg=OK;; (1) componentStatusMsg=FAILED;; (2) componentStatusMsg=Offline;; (11) componentStatusMsg="Status not yet implemented for this component type.";; (12) componentStatusMsg="Unknown component type.";; (13) componentStatusMsg="Component host not configured.";; (*) componentStatusMsg="Unknown Status Code [$componentStatusCode]"; componentStatusCode=99; esac ComponentStatusCode[$component]=$componentStatusCode ComponentStatusMsg[$component]=$componentStatusMsg if [[ "$componentType" == p4broker ]]; then msg "Component $component v$componentVersion ($tag config) status is $componentStatusMsg (code $componentStatusCode)." else msg "Component $component v$componentVersion status is $componentStatusMsg (code $componentStatusCode)." fi return $componentStatusCode } #------------------------------------------------------------------------------ # Function: get_component_version ($component) # # Set values in ComponentMajorVersion, ComponentMinorVersion, and # ComponentVersion associative arrays, indexed by the fully qualified # component name (i.e. <instance>:<compoment_name>). # # Output: This get_component_version() is a quiet function. It does its work # silently. In event of problems, it just sets the version values to UNKNOWN. # It doesn't complain. This is called by get_component_status, which takes # care of notifying the user of problems. #------------------------------------------------------------------------------ function get_component_version () { vvmsg "CALL: get_component_version ($*)" declare component=${1:-Unset} declare componentMasterHost= declare componentType= declare componentExe= declare componentMajorVersion= declare componentMinorVersion= declare componentVersionFile=$P4U_TMPDIR/component_version.${component/:/_}.out declare componentVersionCmd= instance=${component%%:*} componentType=${ComponentType[$component]:-Unset} componentManaged=${ComponentManaged[$component]:-Unset} componentMasterHost=${ComponentMasterHost[$component]:-Unset} case "$componentType" in (p4d-*|p4broker|p4p) if [[ $componentURL == "ssl:"* ]]; then componentP4PORT="ssl:${ComponentMasterHost[$component]:-Unset}:${componentURL#ssl:}" else componentP4PORT="${ComponentMasterHost[$component]:-Unset}:$componentURL" fi componentExe="/p4/$instance/bin/${componentType%-*}_${instance}" ssh -q -l $OSUSER $componentMasterHost "$componentExe -V" > $componentVersionFile 2>&1 componentMajorVersion=$(fgrep "Rev. " $componentVersionFile 2>/dev/null) if [[ -n "$componentMajorVersion" ]]; then componentMinorVersion=$(echo $componentMajorVersion|cut -d '/' -f 4) componentMinorVersion=${componentMinorVersion%% *} componentMajorVersion=$(echo $componentMajorVersion|cut -d '/' -f 3) else componentMajorVersion="UNKNOWN" componentMinorVersion="UNKNOWN" fi ;; (p4dtg|p4web|p4gf) warnmsg "Sorry, can't get version for component of type $componentType just yet." componentMajorVersion="UNKNOWN" componentMinorVersion="UNKNOWN" ;; (swarm) ssh -q -l $OSUSER $componentMasterHost "cat /p4/$instance/swarm/Version" > $componentVersionFile 2>&1 # A Swarm version file has 3 lines and looks like this: # RELEASE = 2016 1 ; # PATCHLEVEL = 1400259 ; # SUPPDATE = 2016 06 13 ; # Extract the RELEASE and PATCHLEVEL and normalized to our standard format. componentMajorVersion=$(fgrep RELEASE $componentVersionFile 2>/dev/null) if [[ -n "$componentMajorVersion" ]]; then componentMajorVersion=${componentMajorVersion##* = } componentMajorVersion=${componentMajorVersion% ;} componentMajorVersion=${componentMajorVersion/ /.} else componentMajorVersion="UNKNOWN" fi componentMinorVersion=$(fgrep PATCHLEVEL $componentVersionFile 2>/dev/null) if [[ -n "$componentMinorVersion" ]]; then componentMinorVersion=${componentMinorVersion##* = } componentMinorVersion=${componentMinorVersion% ;} else componentMinorVersion="UNKNOWN" fi rm -f $componentVersionFile ;; esac ComponentMajorVersion[$component]=$componentMajorVersion ComponentMinorVersion[$component]=$componentMinorVersion if [[ $componentMajorVersion != UNKNOWN ]]; then ComponentVersion[$component]="$componentMajorVersion.$componentMinorVersion" else ComponentVersion[$component]=UNKNOWN fi }
# | Change | User | Description | Committed | |
---|---|---|---|---|---|
#1 | 24292 | gmc | "Forking branch Dev of perforce-software-sdp to gmc-sdp." | ||
//guest/perforce_software/sdp/dev/Server/Unix/p4/common/lib/hms_actions.sh | |||||
#9 | 24224 | C. Thomas Tyler |
Enhanced HMS failover to support failover of edge server. Bypassing pre-commit review as this has been tested in the Battle School alb and contains a needed fix. #review-24225 |
||
#8 | 23747 | C. Thomas Tyler |
Fixed hms status issues. Fixed issue getting status of SSL-enabled servers requiring SDP environment (including P4TRUST) to be set, incorporating new irun utility. Fixed issue with illegal negative exit codes in get_component_status(). Added in-code docs for component status values. Fixed get_component_list() to remove call-stack info in high-verbosity mode, due to text output being interpreted as data. |
||
#7 | 23454 | cgeen |
Update to hms script to do two new functions: pull and df. Pull issues a pull -lj on all the edge/replica servers to get the status df issues a diskspace on all the servers. The reason for this is with a hardened edge environment the P4USER does not have permissions to run these commands on the edge servers. Therefore we need to run them centrally as a super. This replaces the replica_status commands on the edge. The updates also includes an update to the p4_vars to change the P4USER on an edge server. The idea is that P4USER only has super privs on the master on the replica edge servers the default user can only have admin privs. This is so the box adminstrator in restricted regions can't change the protection table. Hence the need for pull df centrally as these are the only commands that can't be run on the replicas/edges from the maintance scripts. It also goes without saying that ssh should only go out from the commit and never back. |
||
#6 | 21720 | C. Thomas Tyler |
Fixed bug where a sophsticated topology with mulitple failover targets defined (e.g. 'ha' and 'dr' options) could failover to the wrong host, using the component backup host rather than the one associated with the specified failover target. Bypassing pre-commit review as this has been tested in the Battle School Lab, and I need to push this fix out. #review-21721 |
||
#5 | 21410 | C. Thomas Tyler | Tweaked to support SSL usage. | ||
#4 | 21033 | C. Thomas Tyler |
Fixed hms hang issue during local failover by replacing raw ssh call with a call to the robust runRemoteCmd() function. Also enhanced to detect ServerID based on configured server spec name rather hard-coded value of 'master' used during development. |
||
#3 | 20853 | C. Thomas Tyler | Fixed hms bug discovered in failback testing; master host was wrong. | ||
#2 | 20804 | C. Thomas Tyler | Fixed ssh hang issues. | ||
#1 | 20745 | C. Thomas Tyler |
Approving as is since it isn't changing core SDP functionality, and reviewing it all line by line will take some time. We can do that as we move forward with it. First addition of HMS v1.0 files. This change is a soft launch HMS for initial deployment and testing. Updates to HMS-related files are expected and will bypass pre-commit code review until stabilized. |