batch_verify.sh #1

  • //
  • guest/
  • russell_jackson/
  • sdp/
  • Server/
  • Unix/
  • p4/
  • common/
  • bin/
  • batch_verify.sh
  • View
  • Commits
  • Open Download .zip Download (7 KB)
#!/bin/bash
 
# Batch verification script with pull queue monitoring
# Processes depot files in batches and monitors replication queue
 
# Default configuration
BATCH_SIZE=1000
MAX_PULL_QUEUE=10000
SLEEP_TIME=30
INPUT_FILE=""
VERBOSE=false
MAX_BATCHES=""
LOGFILE="batch_verify_$(date '+%Y%m%d_%H%M%S').log"
num_transfers=0
 
# Usage function
usage() {
    echo "Usage: $0 -f <input_file> [-b batch_size] [-q max_queue] [-s sleep_time] [-m max_batches] [-v]"
    echo ""
    echo "Options:"
    echo "  -f <file>     Input file containing depot files to verify (required)"
    echo "  -b <size>     Batch size (default: $BATCH_SIZE)"
    echo "  -q <number>   Maximum pull queue size before sleeping (default: $MAX_PULL_QUEUE)"
    echo "  -s <seconds>  Sleep time when queue is full (default: $SLEEP_TIME)"
    echo "  -m <number>   Maximum number of batches to process (default: unlimited)"
    echo "  -v            Verbose output"
    echo ""
    echo "Example:"
    echo "  $0 -f depot_files.txt -b 500 -q 5000 -s 60 -m 100 -v"
    exit 1
}
 
# Parse command line arguments
while getopts "f:b:q:s:m:vh" opt; do
    case $opt in
        f) INPUT_FILE="$OPTARG" ;;
        b) BATCH_SIZE="$OPTARG" ;;
        q) MAX_PULL_QUEUE="$OPTARG" ;;
        s) SLEEP_TIME="$OPTARG" ;;
        m) MAX_BATCHES="$OPTARG" ;;
        v) VERBOSE=true ;;
        h) usage ;;
        *) usage ;;
    esac
done
 
# Validate required parameters
if [[ -z "$INPUT_FILE" ]]; then
    echo "Error: Input file is required (-f option)"
    usage
fi
 
if [[ ! -f "$INPUT_FILE" ]]; then
    echo "Error: Input file '$INPUT_FILE' not found"
    exit 1
fi
 
# Validate numeric parameters
if ! [[ "$BATCH_SIZE" =~ ^[0-9]+$ ]] || [[ "$BATCH_SIZE" -lt 1 ]]; then
    echo "Error: Batch size must be a positive integer"
    exit 1
fi
 
if ! [[ "$MAX_PULL_QUEUE" =~ ^[0-9]+$ ]] || [[ "$MAX_PULL_QUEUE" -lt 1 ]]; then
    echo "Error: Max pull queue must be a positive integer"
    exit 1
fi
 
if ! [[ "$SLEEP_TIME" =~ ^[0-9]+$ ]] || [[ "$SLEEP_TIME" -lt 1 ]]; then
    echo "Error: Sleep time must be a positive integer"
    exit 1
fi
 
if [[ -n "$MAX_BATCHES" ]] && (! [[ "$MAX_BATCHES" =~ ^[0-9]+$ ]] || [[ "$MAX_BATCHES" -lt 1 ]]); then
    echo "Error: Max batches must be a positive integer"
    exit 1
fi
 
# Function to get current pull queue size and details
get_pull_queue_info() {
    local pull_output
    pull_output=$(p4 pull -lsh 2>/dev/null)
   
    # Parse the output: "File transfers: 0 active/15780168 total, bytes: 0B active/32T total."
    if [[ "$pull_output" =~ File\ transfers:\ ([0-9]+)\ active/([0-9]+)\ total,\ bytes:\ ([^\ ]+)\ active/([^\ ]+)\ total\. ]]; then
        local active_transfers="${BASH_REMATCH[1]}"
        local total_transfers="${BASH_REMATCH[2]}"
        local active_bytes="${BASH_REMATCH[3]}"
        local total_bytes="${BASH_REMATCH[4]}"
       
        # Log the detailed information to file only (avoid interfering with return value)
        echo "[$(date '+%Y-%m-%d %H:%M:%S')] Pull queue status: $active_transfers active/$total_transfers total transfers, $active_bytes active/$total_bytes total bytes" >> "$LOGFILE"
       
        # Return the total number for threshold comparison
        echo "$total_transfers"
    else
        echo "[$(date '+%Y-%m-%d %H:%M:%S')] Warning: Could not parse pull queue output: $pull_output" >> "$LOGFILE"
        echo "0"
    fi
}
 
# Function to log with timestamp
log() {
    echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1" | tee -a "$LOGFILE"
}
 
# Function to verify batch of files using sed to extract lines
verify_batch() {
    local start_line="$1"
    local end_line="$2"
    local batch_num="$3"
    local actual_lines=$((end_line - start_line + 1))
   
    if [[ $VERBOSE == true ]]; then
        log "Processing batch $batch_num (lines $start_line-$end_line, $actual_lines files)"
    fi
   
    # Create temporary file for this batch using sed
    local temp_file=$(mktemp)
    sed -n "${start_line},${end_line}p" "$INPUT_FILE" > "$temp_file"
   
    # Run p4 verify with the batch
    num_transfers=0
    if [[ $VERBOSE == true ]]; then
        log "Running: p4 -x $temp_file verify -qt --only MISSING"
        num_transfers=$(p4 -x "$temp_file" verify -qt --only MISSING 2>&1 | grep -cE "BAD|MISSING")
    else
        num_transfers=$(p4 -x "$temp_file" verify -qt --only MISSING 2>&1 | grep -cE "BAD|MISSING")
    fi
   
    local verify_result=$?
    [[ num_transfers -eq 0 ]] && verify_result=0
   
    # Clean up temp file
    rm -f "$temp_file"
   
    return $verify_result
}
 
# Main processing loop
log "Starting batch verification process"
if [[ -n "$MAX_BATCHES" ]]; then
    log "Configuration: Batch size=$BATCH_SIZE, Max queue=$MAX_PULL_QUEUE, Sleep time=${SLEEP_TIME}s, Max batches=$MAX_BATCHES"
else
    log "Configuration: Batch size=$BATCH_SIZE, Max queue=$MAX_PULL_QUEUE, Sleep time=${SLEEP_TIME}s, Max batches=unlimited"
fi
 
# Get initial number of lines in input file
total_lines=$(wc -l < "$INPUT_FILE")
log "Input file: $INPUT_FILE ($total_lines initial files)"
log "Log file: $LOGFILE"
 
batch_num=0
total_processed=0
current_line=1
 
# Process file in batches using sed, checking for new lines being appended
while true; do
    # Calculate end line for this batch based on current known file size
    end_line=$((current_line + BATCH_SIZE - 1))
    if [[ $end_line -gt $total_lines ]]; then
        end_line=$total_lines
    fi
   
    # Check if we need more data (processed all known lines or batch would be empty)
    if [[ $current_line -gt $total_lines ]] || [[ $end_line -lt $current_line ]]; then
        # Update total lines count to check for new content
        current_total_lines=$(wc -l < "$INPUT_FILE")
       
        if [[ $current_total_lines -gt $total_lines ]]; then
            log "File grew from $total_lines to $current_total_lines lines"
            total_lines=$current_total_lines
            # Recalculate end_line with new file size
            end_line=$((current_line + BATCH_SIZE - 1))
            if [[ $end_line -gt $total_lines ]]; then
                end_line=$total_lines
            fi
        else
            # No new lines, wait a bit and check again
            sleep 5
            continue
        fi
    fi
   
    # Exit if we've processed everything and file hasn't grown
    if [[ $current_line -gt $total_lines ]]; then
        break
    fi
   
    # Exit if we've reached the maximum number of batches
    if [[ -n "$MAX_BATCHES" ]] && [[ $batch_num -ge $MAX_BATCHES ]]; then
        log "Reached maximum batch limit ($MAX_BATCHES), stopping processing"
        break
    fi
   
    ((batch_num++))
   
    # Verify the current batch - set's num_transfers
    if verify_batch "$current_line" "$end_line" "$batch_num"; then
        batch_size=$((end_line - current_line + 1))
        total_processed=$((total_processed + batch_size))
        log "Batch $batch_num completed successfully ($num_transfers transfers, $total_processed total processed)"
    else
        log "Warning: Batch $batch_num had verification errors"
    fi
   
    # Check pull queue size and sleep while over threshold
    queue_size=$(get_pull_queue_info)
   
    while [[ $queue_size -gt $MAX_PULL_QUEUE ]]; do
        log "Pull queue size ($queue_size) exceeds threshold ($MAX_PULL_QUEUE), sleeping for ${SLEEP_TIME}s"
        sleep "$SLEEP_TIME"
        queue_size=$(get_pull_queue_info)
    done
   
    # Move to next batch
    current_line=$((end_line + 1))
done
 
log "Batch verification process completed"
log "Total batches processed: $batch_num"
log "Total files processed: $total_processed"
 
# Change User Description Committed
#1 32181 Russell C. Jackson (Rusty) New files to do a batch verify without overloading the pull queue.
Uses output of p4verify_notransfer.bat