#!/bin/bash # # This tool generates configuration files for the ingest process, # starts ingestion, and stops ingestion. # # $Id: //guest/cmclouth/projects/htd-deployment/htd-source/tools/runingest.sh#12 $ # function genoneconfig() { depotdepth=$1 workercount=$2 # constants: scmType, p4ProjectDepth, tenantID, ingestedFolder, ingestErrorFolder ingestedFolder="$ingested" ingestErrorFolder="$ingesting"/ingesterror workerindex=1 while [ $workerindex -le $workercount ] do # variables for instance: ingestFolder, ingestingFolder, logDirectory, configfile ingestFolder="$ingesting"/d"$depotdepth"/w"$workerindex" ingestingFolder="$ingesting" logDirectory="$logroot"/d"$depotdepth"/w"$workerindex" configfile="$ingestconf"/ingestd"$depotdepth"w"$workerindex".conf echo "Generate ingest configuration file: $configfile" typeandformat="" if [ "$scmtype" == "repository" ]; then typeandformat="$scmtype\nrepoFormat=$repoFormat" else typeandformat="$scmtype" fi sed "s/^\s*scmType\s*=.*$/scmType=$typeandformat/" "$ingesttemplate" | sed "s/^\s*p4ProjectDepth\s*=.*$/p4ProjectDepth=$depotdepth/" | sed "s/^\s*tenantID\s*=.*$/tenantID=$tenantid/" | sed "s|^\s*ingestedFolder\s*=.*$|ingestedFolder=$ingestedFolder|" | sed "s|^\s*ingestErrorFolder\s*=.*$|ingestErrorFolder=$ingestErrorFolder|" | sed "s|^\s*ingestFolder\s*=.*$|ingestFolder=$ingestFolder|" | sed "s|^\s*ingestingFolder\s*=.*$|ingestingFolder=$ingestingFolder|" | sed "s|^\s*logDirectory\s*=.*$|logDirectory=$logDirectory|" > "$configfile" ((workerindex += 1)) done } # generate configuration files function genconfig() { existingconfigfiles=$(ls -lah "$ingestconf"/ingest*.conf 2>/dev/null | wc -l) if [ $existingconfigfiles -gt 0 ]; then echo "Are you sure you wish to delete existing configuration files?" select yn in "Yes" "No"; do case $yn in Yes ) break;; No ) echo "GenerateConfigFiles cancelled"; return 0;; esac done rm "$ingestconf"/ingest*.conf fi inginstance=1 # generate subordinate configurations with 1 worker while [ $inginstance -lt $p4projectdepth ] do genoneconfig $inginstance 1 ((inginstance += 1)) done # generate primary configuration with N workers genoneconfig $p4projectdepth $ingestworkers } # stop ingest processes function runstopingest() { # stop the delegator for job in $(ps -aux | grep -v "grep" | grep "ingestprocessmgr.*\.sh" | tr -s ' ' | cut -f2 "-d "); do kill $job done # stop the java process for job in $(jps | grep -i "ingest" | tr -s ' ' | cut -f1 "-d "); do kill $job sleep 2 done # stop the logtail and ingesterrorhandler.sh for job in $(ps -aux | grep -v "grep" | grep "tail -f.*ingest\.[[:digit:]+]\.log" | tr -s ' ' | cut -f2 "-d "); do kill $job done # stop the java process forcibly for job in $(jps | grep -i "ingest" | tr -s ' ' | cut -f1 "-d "); do kill -9 $job done } # validate configuration file and extract variables function validateconfig() { # echo "validateconfig" for field in ingestedFolder ingestErrorFolder ingestFolder ingestingFolder logDirectory; do folder=$(grep "$field" $1 | grep -v '^\s*#' | tr -d '[:blank:]' | cut -f2 "-d=") if [ ! -d $folder ]; then echo "creating directory $folder" mkdir -p "$folder" fi export "$field=$folder" done numtest=0 # set worker and depth unset depotdepth unset workerindex filename=$( basename "$1" ) for field in $(echo "$filename" | sed "s/^[[:alpha:]]\+d\([[:digit:]]\+\)w\([[:digit:]]\+\)\..*/\1\n\2/"); do if [ $numtest -lt 1 ]; then depotdepth=$field else workerindex=$field fi ((numtest += 1)) done } # start ingest processes function runstartingest() { # echo "runstartingest" existingjobs=$(jps | grep -i "ingest" | wc -l) if [ $existingjobs -gt 0 ]; then echo "There are existing Ingest jobs running. Would you like to stop them?" select yn in "Yes" "No"; do case $yn in Yes ) break;; No ) echo "StartIngest cancelled"; return 0;; esac done fi # stop any running jobs runstopingest declare -a w1ingestdirectories declare -a allingestdirectories # start ingest process for each configuration inginstance=0 for configfile in $(find $ingestconf -path "*.conf" | grep "\/ingest.*\.conf$"); do validateconfig "$configfile" if [ $workerindex -eq 1 ]; then w1ingestdirectories[$depotdepth]="$ingestFolder" fi if [ $depotdepth -eq $p4projectdepth ]; then allingestdirectories[$inginstance]=$ingestFolder ((inginstance += 1)) fi /opt/interset/analytics/bin/ingest.sh "$configfile" done # tail the log and send to ingest of depth - 1 for configfile in $(find $ingestconf -path "*.conf" | grep "\/ingest.*\.conf$"); do validateconfig "$configfile" if [ $depotdepth -ne 1 ]; then echo "starting ingesterrorhandler.sh for depotdepth=$depotdepth worker=$workerindex" priordepth=$depotdepth ((priordepth -= 1)) # make sure we have a log file retrycount=10 while [ $retrycount -gt 0 ]; do # wait for the log file if [ -e "$logDirectory/ingest.$tenantid.log" ]; then break fi echo "waiting for $logDirectory/ingest.$tenantid.log" sleep 3 done if [ ! -e "$logDirectory/ingest.$tenantid.log" ]; then echo "Problem with ingest. nothing to tail" return 1 fi tail -f "$logDirectory/ingest.$tenantid.log" | ./ingesterrorhandler.sh "${w1ingestdirectories[priordepth]}" &>> "$logDirectory/ingesterrorhandler.log" & fi done # Start job to distribute to workers # echo ./ingestprocessmgr.sh $ingestRoot "*.gz" ${allingestdirectories[*]} if [ "$useauditconverter" == "1" ]; then ./ingestprocessmgr2.sh "$auditconverter" "$ingestedFolder" "$ingestroot" "*.gz" ${allingestdirectories[*]} &>> "$logroot/ingestprocessmgr.log" & else ./ingestprocessmgr.sh $ingestroot "*.gz" ${allingestdirectories[*]} &>> "$logroot/ingestprocessmgr.log" & fi } # load global variables function loadglobalvars() { # echo "loadglobalvars" # validate the configuration file if [ ! -e $1 ]; then echo "configuration file: $1 doesn't exist" exit 1 fi HTD_CONFIGURE_CONF=$(readlink -f "$1") ingestworkers=$(grep -v "^\\s*#.*$" "$HTD_CONFIGURE_CONF" | grep -v "^$" | sed -n "s/^\\s*\(ingestworkers\\s*\)=\\s*\(\\S\+\)\\s*$/\2/p") ingestroot=$(grep -v "^\\s*#.*$" "$HTD_CONFIGURE_CONF" | grep -v "^$" | sed -n "s/^\\s*\(ingestroot\\s*\)=\\s*\(\\S\+\)\\s*$/\2/p") ingesting=$(grep -v "^\\s*#.*$" "$HTD_CONFIGURE_CONF" | grep -v "^$" | sed -n "s/^\\s*\(ingesting\\s*\)=\\s*\(\\S\+\)\\s*$/\2/p") ingested=$(grep -v "^\\s*#.*$" "$HTD_CONFIGURE_CONF" | grep -v "^$" | sed -n "s/^\\s*\(ingested\\s*\)=\\s*\(\\S\+\)\\s*$/\2/p") logroot=$(grep -v "^\\s*#.*$" "$HTD_CONFIGURE_CONF" | grep -v "^$" | sed -n "s/^\\s*\(logroot\\s*\)=\\s*\(\\S\+\)\\s*$/\2/p") auditconverter=$(grep -v "^\\s*#.*$" "$HTD_CONFIGURE_CONF" | grep -v "^$" | sed -n "s/^\\s*\(auditconverter\\s*\)=\\s*\(\\S\+\)\\s*$/\2/p") ingestconf=$(grep -v "^\\s*#.*$" "$HTD_CONFIGURE_CONF" | grep -v "^$" | sed -n "s/^\\s*\(ingestconf\\s*\)=\\s*\(\\S\+\)\\s*$/\2/p") ingesttemplate=$(grep -v "^\\s*#.*$" "$HTD_CONFIGURE_CONF" | grep -v "^$" | sed -n "s/^\\s*\(ingesttemplate\\s*\)=\\s*\(\\S\+\)\\s*$/\2/p") p4projectdepth=$(grep -v "^\\s*#.*$" "$HTD_CONFIGURE_CONF" | grep -v "^$" | sed -n "s/^\\s*\(p4projectdepth\\s*\)=\\s*\(\\S\+\)\\s*$/\2/p") scmtype=$(grep -v "^\\s*#.*$" "$HTD_CONFIGURE_CONF" | grep -v "^$" | sed -n "s/^\\s*\(scmtype\\s*\)=\\s*\(\\S\+\)\\s*$/\2/p") repoformat=$(grep -v "^\\s*#.*$" "$HTD_CONFIGURE_CONF" | grep -v "^$" | sed -n "s/^\\s*\(repoformat\\s*\)=\\s*\(\\S\+\)\\s*$/\2/p") tenantid=$(grep -v "^\\s*#.*$" "$HTD_CONFIGURE_CONF" | grep -v "^$" | sed -n "s/^\\s*\(tenantid\\s*\)=\\s*\(\\S\+\)\\s*$/\2/p") useauditconverter=$(grep -v "^\\s*#.*$" "$HTD_CONFIGURE_CONF" | grep -v "^$" | sed -n "s/^\\s*\(useauditconverter\\s*\)=\\s*\(\\S\+\)\\s*$/\2/p") } set -e # Exit immediately on non-zero status. if [ $# -eq 0 ]; then echo "Usage: $0 </Path/to/conf_file>" echo "Example: $0 /home/interset/tools/ingest.conf" exit 1 fi loadglobalvars $1 # Make sure configuration is set if [ -z "$ingestworkers" ]; then echo "ingestworkers not set!" exit 1 fi if [ -z "$ingestroot" ]; then echo "ingestroot not set!" exit 1 fi if [ -z "$ingesting" ]; then echo "ingesting not set!" exit 1 fi if [ -z "$ingested" ]; then echo "ingested not set!" exit 1 fi if [ -z "$logroot" ]; then echo "logroot not set!" exit 1 fi if [ -z "$auditconverter" ]; then echo "auditconverter not set!" exit 1 fi if [ -z "$ingestconf" ]; then echo "ingestconf not set!" exit 1 fi if [ -z "$ingesttemplate" ]; then echo "ingesttemplate not set!" exit 1 fi if [ -z "$p4projectdepth" ]; then echo "p4projectdepth not set!" exit 1 fi if [ -z "$scmtype" ]; then echo "scmtype not set!" exit 1 fi if [ "$scmtype" == "repository" ]; then if [ -z "$repoformat" ]; then echo "repoformat not set!" exit 1 fi fi if [ -z "$tenantid" ]; then echo "tenantid not set!" exit 1 fi if [ -z "$useauditconverter" ]; then echo "useauditconverter not set!" exit 1 else if [ "$useauditconverter" == "1" ]; then scmtype="perforce" fi fi # configure settings echo "HTD_CONFIGURE_CONF=$HTD_CONFIGURE_CONF" echo "ingestworkers=$ingestworkers" echo "ingestroot=$ingestroot" echo "ingesting=$ingesting" echo "ingested=$ingested" echo "logroot=$logroot" echo "auditconverter=$auditconverter" echo "ingestconf=$ingestconf" echo "ingesttemplate=$ingesttemplate" echo "p4projectdepth=$p4projectdepth" echo "scmtype=$scmtype" echo "repoformat=$repoformat" echo "tenantid=$tenantid" echo "useauditconverter=$useauditconverter" # validate ingest directory if [ ! -d $ingestroot ]; then echo "ingest directory: $ingestroot doesn't exist" exit 1 fi # validate ingesting directory if [ ! -d $ingesting ]; then echo "ingesting directory: $ingesting doesn't exist" exit 1 fi # validate ingested directory if [ ! -d $ingested ]; then echo "ingested directory: $ingested doesn't exist" exit 1 fi # validate log directory if [ ! -d $logroot ]; then echo "log directory: $logroot doesn't exist" exit 1 fi # validate conf directory if [ ! -d $ingestconf ]; then echo "conf directory: $ingestconf doesn't exist" exit 1 fi # validate the source ingestion configuration if [ ! -e $ingesttemplate ]; then echo "template configuration file: $ingesttemplate doesn't exist" exit 1 fi # validate the auditconverter exists if [ "$useauditconverter" == "1" ]; then if [ ! -e $auditconverter ]; then echo "auditconverter: $auditconverter doesn't exist" exit 1 fi fi # menu action="nothing" until [ "$action" = "quit" ] do echo "Select option to perform" select action in "GenerateConfigFiles" "StartIngest" "StopIngest" "quit"; do case $action in GenerateConfigFiles ) genconfig;break;; StartIngest ) runstartingest;break;; StopIngest ) runstopingest;break;; quit ) break;; esac done done exit 1
# | Change | User | Description | Committed | |
---|---|---|---|---|---|
#12 | 18183 | Charlie McLouth | move ingesterror folder under ingesting instead of ingestroot | ||
#11 | 18182 | Charlie McLouth | Move in process ingestions to ingesting configuration | ||
#10 | 18180 | Charlie McLouth | Made the ingesting directory configurable | ||
#9 | 18159 | Charlie McLouth |
Two changes: * Allow ingested directory (already processed) to be a configuration option * To prevent the overwriting of existing files, we will first brand all incoming files with a date and time. |
||
#8 | 18158 | Charlie McLouth | Changed filetype to ktext | ||
#7 | 18157 | Charlie McLouth | Merging changes | ||
#6 | 18150 | Charlie McLouth | Add first level of support for auditconverter | ||
#5 | 18149 | Charlie McLouth | add tenantID to logfile name | ||
#4 | 18137 | Charlie McLouth | Move output file to the ingest directory | ||
#3 | 18136 | Charlie McLouth | retry the kill of the ingest job forcibly | ||
#2 | 18134 | Charlie McLouth | repoFormat is required for scmType perforce | ||
#1 | 18132 | Charlie McLouth | Adding tools for handing ingest configuration and job management |