#!/bin/bash
# Batch verification script with pull queue monitoring
# Processes depot files in batches and monitors replication queue
# Default configuration
BATCH_SIZE=1000
MAX_PULL_QUEUE=10000
SLEEP_TIME=30
INPUT_FILE=""
VERBOSE=false
MAX_BATCHES=""
LOGFILE="batch_verify_$(date '+%Y%m%d_%H%M%S').log"
num_transfers=0
# Usage function
usage() {
echo "Usage: $0 -f <input_file> [-b batch_size] [-q max_queue] [-s sleep_time] [-m max_batches] [-v]"
echo ""
echo "Options:"
echo " -f <file> Input file containing depot files to verify (required)"
echo " -b <size> Batch size (default: $BATCH_SIZE)"
echo " -q <number> Maximum pull queue size before sleeping (default: $MAX_PULL_QUEUE)"
echo " -s <seconds> Sleep time when queue is full (default: $SLEEP_TIME)"
echo " -m <number> Maximum number of batches to process (default: unlimited)"
echo " -v Verbose output"
echo ""
echo "Example:"
echo " $0 -f depot_files.txt -b 500 -q 5000 -s 60 -m 100 -v"
exit 1
}
# Parse command line arguments
while getopts "f:b:q:s:m:vh" opt; do
case $opt in
f) INPUT_FILE="$OPTARG" ;;
b) BATCH_SIZE="$OPTARG" ;;
q) MAX_PULL_QUEUE="$OPTARG" ;;
s) SLEEP_TIME="$OPTARG" ;;
m) MAX_BATCHES="$OPTARG" ;;
v) VERBOSE=true ;;
h) usage ;;
*) usage ;;
esac
done
# Validate required parameters
if [[ -z "$INPUT_FILE" ]]; then
echo "Error: Input file is required (-f option)"
usage
fi
if [[ ! -f "$INPUT_FILE" ]]; then
echo "Error: Input file '$INPUT_FILE' not found"
exit 1
fi
# Validate numeric parameters
if ! [[ "$BATCH_SIZE" =~ ^[0-9]+$ ]] || [[ "$BATCH_SIZE" -lt 1 ]]; then
echo "Error: Batch size must be a positive integer"
exit 1
fi
if ! [[ "$MAX_PULL_QUEUE" =~ ^[0-9]+$ ]] || [[ "$MAX_PULL_QUEUE" -lt 1 ]]; then
echo "Error: Max pull queue must be a positive integer"
exit 1
fi
if ! [[ "$SLEEP_TIME" =~ ^[0-9]+$ ]] || [[ "$SLEEP_TIME" -lt 1 ]]; then
echo "Error: Sleep time must be a positive integer"
exit 1
fi
if [[ -n "$MAX_BATCHES" ]] && (! [[ "$MAX_BATCHES" =~ ^[0-9]+$ ]] || [[ "$MAX_BATCHES" -lt 1 ]]); then
echo "Error: Max batches must be a positive integer"
exit 1
fi
# Function to get current pull queue size and details
get_pull_queue_info() {
local pull_output
pull_output=$(p4 pull -lsh 2>/dev/null)
# Parse the output: "File transfers: 0 active/15780168 total, bytes: 0B active/32T total."
if [[ "$pull_output" =~ File\ transfers:\ ([0-9]+)\ active/([0-9]+)\ total,\ bytes:\ ([^\ ]+)\ active/([^\ ]+)\ total\. ]]; then
local active_transfers="${BASH_REMATCH[1]}"
local total_transfers="${BASH_REMATCH[2]}"
local active_bytes="${BASH_REMATCH[3]}"
local total_bytes="${BASH_REMATCH[4]}"
# Log the detailed information to file only (avoid interfering with return value)
echo "[$(date '+%Y-%m-%d %H:%M:%S')] Pull queue status: $active_transfers active/$total_transfers total transfers, $active_bytes active/$total_bytes total bytes" >> "$LOGFILE"
# Return the total number for threshold comparison
echo "$total_transfers"
else
echo "[$(date '+%Y-%m-%d %H:%M:%S')] Warning: Could not parse pull queue output: $pull_output" >> "$LOGFILE"
echo "0"
fi
}
# Function to log with timestamp
log() {
echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1" | tee -a "$LOGFILE"
}
# Function to verify batch of files using sed to extract lines
verify_batch() {
local start_line="$1"
local end_line="$2"
local batch_num="$3"
local actual_lines=$((end_line - start_line + 1))
if [[ $VERBOSE == true ]]; then
log "Processing batch $batch_num (lines $start_line-$end_line, $actual_lines files)"
fi
# Create temporary file for this batch using sed
local temp_file=$(mktemp)
sed -n "${start_line},${end_line}p" "$INPUT_FILE" > "$temp_file"
# Run p4 verify with the batch
num_transfers=0
if [[ $VERBOSE == true ]]; then
log "Running: p4 -x $temp_file verify -qt --only MISSING"
num_transfers=$(p4 -x "$temp_file" verify -qt --only MISSING 2>&1 | grep -cE "BAD|MISSING")
else
num_transfers=$(p4 -x "$temp_file" verify -qt --only MISSING 2>&1 | grep -cE "BAD|MISSING")
fi
local verify_result=$?
[[ num_transfers -eq 0 ]] && verify_result=0
# Clean up temp file
rm -f "$temp_file"
return $verify_result
}
# Main processing loop
log "Starting batch verification process"
if [[ -n "$MAX_BATCHES" ]]; then
log "Configuration: Batch size=$BATCH_SIZE, Max queue=$MAX_PULL_QUEUE, Sleep time=${SLEEP_TIME}s, Max batches=$MAX_BATCHES"
else
log "Configuration: Batch size=$BATCH_SIZE, Max queue=$MAX_PULL_QUEUE, Sleep time=${SLEEP_TIME}s, Max batches=unlimited"
fi
# Get initial number of lines in input file
total_lines=$(wc -l < "$INPUT_FILE")
log "Input file: $INPUT_FILE ($total_lines initial files)"
log "Log file: $LOGFILE"
batch_num=0
total_processed=0
current_line=1
# Process file in batches using sed, checking for new lines being appended
while true; do
# Calculate end line for this batch based on current known file size
end_line=$((current_line + BATCH_SIZE - 1))
if [[ $end_line -gt $total_lines ]]; then
end_line=$total_lines
fi
# Check if we need more data (processed all known lines or batch would be empty)
if [[ $current_line -gt $total_lines ]] || [[ $end_line -lt $current_line ]]; then
# Update total lines count to check for new content
current_total_lines=$(wc -l < "$INPUT_FILE")
if [[ $current_total_lines -gt $total_lines ]]; then
log "File grew from $total_lines to $current_total_lines lines"
total_lines=$current_total_lines
# Recalculate end_line with new file size
end_line=$((current_line + BATCH_SIZE - 1))
if [[ $end_line -gt $total_lines ]]; then
end_line=$total_lines
fi
else
# No new lines, wait a bit and check again
sleep 5
continue
fi
fi
# Exit if we've processed everything and file hasn't grown
if [[ $current_line -gt $total_lines ]]; then
break
fi
# Exit if we've reached the maximum number of batches
if [[ -n "$MAX_BATCHES" ]] && [[ $batch_num -ge $MAX_BATCHES ]]; then
log "Reached maximum batch limit ($MAX_BATCHES), stopping processing"
break
fi
((batch_num++))
# Verify the current batch - set's num_transfers
if verify_batch "$current_line" "$end_line" "$batch_num"; then
batch_size=$((end_line - current_line + 1))
total_processed=$((total_processed + batch_size))
log "Batch $batch_num completed successfully ($num_transfers transfers, $total_processed total processed)"
else
log "Warning: Batch $batch_num had verification errors"
fi
# Check pull queue size and sleep while over threshold
queue_size=$(get_pull_queue_info)
while [[ $queue_size -gt $MAX_PULL_QUEUE ]]; do
log "Pull queue size ($queue_size) exceeds threshold ($MAX_PULL_QUEUE), sleeping for ${SLEEP_TIME}s"
sleep "$SLEEP_TIME"
queue_size=$(get_pull_queue_info)
done
# Move to next batch
current_line=$((end_line + 1))
done
log "Batch verification process completed"
log "Total batches processed: $batch_num"
log "Total files processed: $total_processed"
| # | Change | User | Description | Committed | |
|---|---|---|---|---|---|
| #1 | 32181 | Russell C. Jackson (Rusty) |
New files to do a batch verify without overloading the pull queue. Uses output of p4verify_notransfer.bat |