#!/bin/bash
#
# This is a simple tool process the ingest log for records that failed to be
# ingested. Found records are written to compressed log files in the supplied
# output directory.
#
# $Id: //guest/cmclouth/projects/htd-deployment/htd-source/tools/ingesterrorhandler.sh#7 $
#
function closelog() {
echo "closelog"
if [ ! -z "$ACTIVELOG" ]; then
# compress log file to output dir
echo "compressing $ACTIVELOG"
gzip -c "$ACTIVELOG" > "$OUTPUTDIR/$TEMPLOG.$$.$LOGCOUNTER.gz"
# remove active log
echo "removing $ACTIVELOG"
rm "$ACTIVELOG"
ACTIVELOG=""
fi
}
function openlog() {
echo "openlog"
if [ -z "$ACTIVELOG" ]; then
((LOGCOUNTER += 1))
ROWCOUNT=0
ERRORCOUNT=0
ACTIVELOG="$TEMPLOGDIR/$TEMPLOG.$$.$LOGCOUNTER"
echo "creating $ACTIVELOG"
touch $ACTIVELOG
fi
}
function ingestfilestart() {
closelog
}
function ingestfilestop() {
closelog
}
function ingestrecorderror() {
# echo "ingestrecorderror"
localrow="$1"
errorreason=`echo $localrow | sed -n 's/.* (\(.*\))$/\1/p'`
errorrecord=`echo $localrow | sed -n "s/$INGESTERROR\(.*\)\( (.*)\)\{,1\}$/\1/p"`
# echo "errorreason=$errorreason"
# test for error reason
case "$errorreason" in
"empty user, action or project")
echo "ignoring reason ($errorreason)"
;;
*)
if [ -z "$ACTIVELOG" ]; then
openlog
fi
((ERRORCOUNT += 1))
echo "$errorrecord" >> "$ACTIVELOG"
#echo "ROWCOUNT=$ROWCOUNT;ERRORCOUNT=$ERRORCOUNT"
;;
esac
}
trap ctrl_c INT
function ctrl_c() {
echo "ctrl_c"
# close up any active log
ingestfilestop
exit 1
}
set -e # Exit immediately on non-zero status.
if [ $# -eq 0 ]; then
echo "Usage: $0 </Path/to/target/dir> [</Path/to/source.log>]"
echo "Example: $0 /tmp/ingest-all/ingestX /opt/interset/analytics/logs/ingest.0.log"
echo "Example: tail -f /opt/interset/analytics/logs/ingest.0.log | $0 /tmp/ingest-all/ingestX"
exit 1
fi
# validate target dir
if [ ! -d $1 ]; then
echo "target directory: $1 doesn't exist"
exit 1
fi
OUTPUTDIR="$1"
ROWCOUNT=0
ERRORCOUNT=0
INGESTSTART="Ingesting File: "
INGESTSTOP="Total number of records ingested for file "
# "Done processing batch of : "
INGESTSTOP2="Total number of records ingested: "
INGESTSTOP3="Total number of records ingested for file"
INGESTERROR="Unable to process record: "
LOGCOUNTER=0
TEMPLOG="ingesterrors"
TEMPLOGDIR="/tmp"
ACTIVELOG=""
while IFS= read line; do
if [ -z "$line" ]; then # Ignore empty lines
continue;
elif [[ "$line" =~ "$INGESTSTART" ]]; then # Begin file ingestion
ingestfilestart
elif [[ "$line" =~ "$INGESTSTOP" ||
"$line" =~ "$INGESTSTOP2" ||
"$line" =~ "$INGESTSTOP3" ]]; then # End file ingestion
ingestfilestop
elif [[ "$line" =~ "$INGESTERROR" ]]; then # file ingestion error
ingestrecorderror "$line"
#echo "$ROWCOUNT" `date`
fi
# echo "$ROWCOUNT:$line"
((ROWCOUNT += 1))
done < "${2:-/dev/stdin}"
# close up any active log
echo "no more input"
ingestfilestop