#!/usr/local/bin/php
<?php


/**
 *
 * Copyright (c) 2008, Perforce Software, Inc.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 * 1.  Redistributions of source code must retain the above copyright
 *     notice, this list of conditions and the following disclaimer.
 *
 * 2.  Redistributions in binary form must reproduce the above copyright
 *     notice, this list of conditions and the following disclaimer in the
 *     documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL PERFORCE SOFTWARE, INC. BE LIABLE FOR ANY
 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 *
 *  T R A C K 2 S Q L . P H P 
 * ---------------------------
 * 
 *  Track2sql takes a VTRACK log file (server version 2005.2 or
 *  greater) as input, and produces an SQL file as output. It 
 *  requires a PHP command-line interpreter and an SQL database.
 *  It has been tested with PHP version 5, but might be compatible
 *  with earlier versions. It has been tested with MySQL version 5
 *  but should be compatible with other SQL databases.
 *  
 *  See also: http://public.perforce.com/guest/stewart_lord/track2sql/README
 *
 *
 *  U S A G E
 * -----------
 *  
 *   php track2sql.php [ logFile | - [ sqlFile | - ] ] [ -d dbName ]
 *  
 *    Options:
 *      logFile     name of input file or '-' for stdin
 *      sqlFile     name of output file or '-' for stdout
 *      -d          name of database to create 
 *      -v|V|h      view version and usage information
 *     
 *   Examples:
 *     $ track2sql.php log track.sql -d track
 *     $ cat log | track2sql.php | mysql
 *     $ tail -F log | track2sql.php | mysql
 *
 *
 *  O U T P U T
 * -------------
 *
 * Produces an SQL file that will create and populate the following
 * tables:
 *
 *  +------------+---------------+------+-----+
 *  | P R O C E S S                           |
 *  +------------+---------------+------+-----+
 *  | Field      | Type          | Null | Key |
 *  +------------+---------------+------+-----+
 *  | processKey | varchar(36)   | NO   | PRI |
 *  | time       | int(11)       | NO   |     |
 *  | pid        | int(11)       | NO   |     |
 *  | user       | varchar(255)  | NO   |     |
 *  | client     | varchar(255)  | NO   |     |
 *  | ip         | varchar(255)  | NO   |     |
 *  | app        | varchar(255)  | NO   |     |
 *  | cmd        | varchar(255)  | NO   |     |
 *  | args       | text          | YES  |     |
 *  | lapse      | decimal(10,3) | YES  |     |
 *  | uCpu       | int(11)       | YES  |     |
 *  | sCpu       | int(11)       | YES  |     |
 *  | diskIn     | int(11)       | YES  |     |
 *  | diskOut    | int(11)       | YES  |     |
 *  | ipcIn      | int(11)       | YES  |     |
 *  | ipcOut     | int(11)       | YES  |     |
 *  | maxRss     | int(11)       | YES  |     |
 *  | pageFaults | int(11)       | YES  |     |
 *  | rpcMsgsIn  | int(11)       | YES  |     |
 *  | rpcMsgsOut | int(11)       | YES  |     |
 *  | rpcSizeIn  | int(11)       | YES  |     |
 *  | rpcSizeOut | int(11)       | YES  |     |
 *  +------------+---------------+------+-----+
 * 
 *  +-------------+--------------+------+-----+
 *  | T A B L E   U S E                       |
 *  +-------------+--------------+------+-----+
 *  | Field       | Type         | Null | Key |
 *  +-------------+--------------+------+-----+
 *  | processKey  | varchar(36)  | NO   | PRI |
 *  | tableName   | varchar(255) | NO   | PRI |
 *  | pagesIn     | int(11)      | YES  |     |
 *  | pagesOut    | int(11)      | YES  |     |
 *  | pagesCached | int(11)      | YES  |     |
 *  | readLocks   | int(11)      | YES  |     |
 *  | writeLocks  | int(11)      | YES  |     |
 *  | getRows     | int(11)      | YES  |     |
 *  | posRows     | int(11)      | YES  |     |
 *  | scanRows    | int(11)      | YES  |     |
 *  | putRows     | int(11)      | YES  |     |
 *  | delRows     | int(11)      | YES  |     |
 *  | readWait    | int(11)      | YES  |     |
 *  | readHeld    | int(11)      | YES  |     |
 *  | writeWait   | int(11)      | YES  |     |
 *  | writeHeld   | int(11)      | YES  |     |
 *  +-------------+--------------+------+-----+
 *
 */


// run.
main( $argv, $argc );


/**
 * The main function. 
 * loops over input file - writes to output file.
 *
 * @param   array   argv   the options given to dictate behavior.
 * @param   int     argc   the number of arguments.
 */
function main( $argv, $argc )
{

    // relax error reporting to suppress notices.
    error_reporting( E_ALL & ~E_NOTICE );

    // set default timezone to silence warnings when using date/time funcs.
    date_default_timezone_set(@date_default_timezone_get());
    
    // output usage and version information if so instructed.
    // consider -v, -V, and -h to be a request for this info.
    if ( $argv[1] == "-v" || $argv[1] == "-V" || $argv[1] == "-h" )
    {
        $help  = "\nTRACK2SQL revision " . '$Change: 7621 $';
        $help .= "\n" . str_repeat( "-", strlen( $help ) ) . "\n\n";
        $help .= "Track2sql takes a VTRACK log file (server version 2005.2 \n" .
         "or greater) as input, and produces an SQL file as output.\n" .
         "For more information, such as the schema and example queries,\n" .
         "visit: \n\n   " .
         "http://public.perforce.com/guest/stewart_lord/track2sql/README\n\n" .
         "Usage: \n   " . 
         "php track2sql.php [ logFile | - [ sqlFile | - ] ] [ -d dbName ]\n\n" .
         "   Options: \n" .
         "      logFile     name of input file or '-' for stdin \n" .
         "      sqlFile     name of output file or '-' for stdout \n" .
         "      -d          name of database to create \n" .
         "      -v|V|h      view version and usage information \n\n" .    
         "   Examples: \n" .
         "      $ track2sql.php log track.sql -d track \n" .
         "      $ cat log | track2sql.php | mysql \n" .
         "      $ tail -F log | track2sql.php | mysql \n\n";
        echo $help;
        exit;
    }

    // parse args:
    //  -> if arg looks like '-d', then take next piece for db name.
    //  -> take first (non -d) arg to be inputFile.
    //  -> take second (non -d) arg to be outputFile.
    for ( $i = 1; $i < $argc; $i++ )
    {
        if ( substr( $argv[ $i ], 0, 2 ) == "-d" )
        {
            if ( strlen( $argv[ $i ] ) == 2 )
                $dbName = $argv[ ++$i ];
            else
                $dbName = substr( $argv[ $i ], 2 );
        }
        else if ( !$inputFile )
        {
            $inputFile = $argv[ $i ];
        }
        else if ( !$outputFile )
        {
            $outputFile = $argv[ $i ];
        }
    }
    
    // if input file argument given, check validity of file.
    if ( $inputFile && $inputFile != "-" && !is_file( $inputFile ) )
        die( "Error: The specified input file does not exist.\n" );
    if ( $inputFile && $inputFile != "-" && !filesize( $inputFile ) )
        die( "Error: The input file is empty.\n" );

    // input and output file arguments are optional.
    //  -> if no input file given, then use stdin.
    //  -> if no output file given, then use stdin.
    //  -> if input or output file is '-', then use stdin/out.
    if ( !$inputFile  || $inputFile  == "-" ) $inputFile  = "php://stdin";
    if ( !$outputFile || $outputFile == "-" ) $outputFile = "php://stdout";
    
    // open the files and test handles.
    $in  = fopen( $inputFile,  'r' );
    $out = fopen( $outputFile, 'w' );
    if ( !is_resource( $in ) )
        die( "Error: Unable to open input stream.\n" );
    if ( !is_resource( $out ) )
        die( "Error: Unable to open output stream.\n" );
        
    // if dbName given, write out the create/use db statements.
    if ( trim( $dbName ) )
    {
        fwrite( $out,
            "CREATE DATABASE IF NOT EXISTS " . $dbName . "; \n" .
            "USE " . $dbName . "; \n" 
        );
    }

    // write out the create table statements.
    fwrite( $out, getCreateStatements() );

    // start reading lines - recognize 'info' blocks, buffer and parse them.
    while ( $line = fgets( $in ) )
    {

        // if not inside info block, look for start of block.
        if ( !$inInfo )
        {
            if ( isInfoStart( $line ) )
                $inInfo = TRUE;
        }
        
        // if inside info block, buffer and look for end of block.
        else if ( $inInfo )
        {
            if ( isInfoEnd( $line ) )
            {
                $sql = parseInfo( $buffer );
                if ( $sql ) fwrite( $out, $sql );
                if ( !isInfoStart( $line ) )
                    $inInfo = FALSE;            
                $buffer = NULL;
            } else {
                $buffer .= $line;
            }    
        }
        
    }
    
    // clean-up.
    fclose( $in );
    fclose( $out );

}


/**
 * Quick test for the beginning of an info block.
 *
 * @param   string  line    the line to test.
 * @return  bool    the result of the test.
 */
function isInfoStart( $line )
{
    if ( ( $line === "Perforce server info:\n" ) or
         ( $line === "Perforce server info:\r\n" ) )
        return TRUE;
}


/**
 * Quick test for the end of an info block.
 *
 * @param   string  line    the line to test.
 * @return  bool    the result of the test.
 */
function isInfoEnd( $line )
{
    if ( !trim( $line ) || isInfoStart( $line ) )
        return TRUE;
}


/**
 * Extract track information from info block.
 *
 * @param   string  info    the info block to parse.
 */
function parseInfo( $info )
{

    // early exit if not track output.
    if ( !isTrackOutput( $info ) ) return;

    // manufacture a key for joining process
    // and table records.
    $key = quote( makeProcessKey() );

    // build the process record.
    $processInfo  =  extractProcessInfo( $info );
    $sql          =   
        "INSERT INTO process " .
        "VALUES ( " . $key . "," . implode( ",", $processInfo ) . " );\n";
        
    // build the table records.
    $tablesInfo   =  extractTablesInfo( $info );
    if ( !is_array( $tablesInfo ) ) return $sql;
    foreach ( $tablesInfo as $tableInfo )
    {
        $sql .= 
            "INSERT INTO tableUse " .
            "VALUES ( " . $key . "," . implode( ",", $tableInfo ) . ");\n";
    }
 
    // return the sql.
    return $sql;
    
}


/**
 * Test if the given string looks like vtrack output
 * with a complete process information line.
 * 
 * @param   string  info    the info block to test.
 * @return  bool    true if the info block appears to be usable track output.
 */
function isTrackOutput( $info )
{
    return ( strpos( $info, "\n--- " ) !== FALSE &&
        strpos( substr( $info, 0, strpos( $info, "\n" ) ), "@" ) !== FALSE );
}


/**
 * Make a unique key (UUID) for joining processes and table use records.
 *
 * @return  string  a UUID to use as a process key.
 */
function makeProcessKey()
{
    $chars = md5( uniqid( mt_rand(), TRUE ) );
    $uuid  = substr( $chars,  0,  8 ) . '-';
    $uuid .= substr( $chars,  8,  4 ) . '-';
    $uuid .= substr( $chars, 12,  4 ) . '-';
    $uuid .= substr( $chars, 16,  4 ) . '-';
    $uuid .= substr( $chars, 20, 12 );
    return $uuid;
}


/**
 * Take a vtrack block, and extract process information.
 * Resulting array contains 21 elements (quoted where appropriate).
 *
 * @param   string  track   the vtrack output to parse.
 * @return  array   the exploded process information.
 */
function extractProcessInfo( $track )
{

    // break into lines.
    $lines = explode( "\n", $track );

    // pull apart the first line.
    $parts = explode( " ", trim( $lines[0] ), 7 );
    list( $user, $client ) = explode( "@", $parts[4], 2 );
    list( $app, $command ) = explode( "] '", $parts[6], 2 );
    list( $cmd, $args )    = explode( " ", $command, 2 ); 
    $process[] = strtotime( $parts[0] . " " . $parts[1] );
    $process[] = intval( $parts[3] );
    $process[] = quote( trim( $user ) );
    $process[] = quote( trim( $client ) );
    $process[] = quote( trim( $parts[5] ) );
    $process[] = quote( substr( $app, 1 ) );
    if ( $args )
    {
        $process[] = quote( $cmd );
        $process[] = quote( substr( $args, 0, -1 ) );
    } else {
        $process[] = quote( substr( $cmd, 0, -1 ) );
	$process[] = "NULL";
    }

    // search the next three lines for lapse, usage and rpc.
    //  -> only first could be lapse.
    //  -> first or second could be usage.
    //  -> anyone could be rpc.
    for ( $i = 1; ( $i < 4 ) && ( $i < count( $lines ) ); $i++ )
    {
        if ( $i == 1 && substr( $lines[ $i ], 0, 10 ) == "--- lapse " )
            $lapseTime = trim( $lines[ $i ] );
        else if ( $i < 3 && substr( $lines[ $i ], 0, 10 ) == "--- usage " )
            $usageStats = trim( $lines[ $i ] );
        else if ( substr( $lines[ $i ], 0, 8 ) == "--- rpc " )
            $rpcStats = trim( $lines[ $i ] );
    }
    
    // insert lapse time into process info array - or pad with 'NULL'.
    if ( $lapseTime )
    {
        $process[] = floatval( substr( $lapseTime, 10, -1 ) );
    } else {
        $process[] = "NULL";
    }

    // insert usage stats into process info array - or pad with 'NULL'.
    if ( $usageStats )
    {
        $parts = preg_split( "/ |\+/", $usageStats );
        for ( $i = 2; $i < 10; $i++ )
            $process[] = intval( $parts[ $i ] );
    } else {
        for ( $i = 0; $i < 8; $i++ )
            $process[] = "NULL";
    }    

    // insert rcp stats into process info array - or pad with 'NULL'.
    if ( $rpcStats )
    {
        $parts = preg_split( "/ |\+|\//", $rpcStats );
        for ( $i = 6; $i < 10; $i++ )
            $process[] = intval( $parts[ $i ] );
    } else {
        for ( $i = 0; $i < 4; $i++ )
            $process[] = "NULL";
    }    

    // return process info array.
    return $process;

}


/**
 * Take a vtrack block, and extract tables usage information.
 *
 * @param   string  track   the vtrack output to parse.
 * @return  array   the exploded tables information.
 */
function extractTablesInfo( $track )
{

    // break apart track output on tables.
    $blocks = explode( "--- db.", $track );
    for ( $i = 1; $i < count( $blocks ); $i++ )
    {
        $tables[] = extractTableInfo( $blocks[ $i ] );
    }
    return $tables;

}


/**
 * Take a snippet of vtrack, and extract single table usage information.
 * Returns an array containing 15 elements.
 *
 * @param   string  track   the vtrack output to parse.
 * @return  array   the exploded table information.
 */
function extractTableInfo( $track )
{

    // break into lines.
    $lines = explode( "\n", $track );

    // first line is table name.
    $table[] = "'" . trim( $lines[0] ) . "'";

    // search the next four lines for pages, rows and locks.
    //  -> only first could be pages.
    //  -> first, second or third could be rows.
    //  -> anyone could be locks.
    for ( $i = 1; ( $i < 5 ) && ( $i < count( $lines ) ); $i++ )
    {
        if ( $i == 1 && substr( $lines[ $i ], 0, 14 ) == "---   pages in" )
            $pageStats = trim( $lines[ $i ] );
        else if ( $i < 4 && substr( $lines[ $i ], 0, 12 ) == "---   locks " &&
            substr( $lines[ $i ], 0, 16 ) != "---   locks wait" )
            $rowStats = trim( $lines[ $i ] );
        else if ( substr( $lines[ $i ], 0, 16 ) == "---   locks wait" )
            $lockStats = substr( $lines[ $i ], 33 );
        else if ( substr( $lines[ $i ], 0, 16 ) == "---   total lock" )
            $lockStats = substr( $lines[ $i ], 38 );
    }

    // insert page stats into table info array - or pad with 'NULL'.
    if ( $pageStats )
    {
        $parts = preg_split( "/[ |\+]+/", $pageStats );
        for ( $i = 5; $i < 8; $i++ )
            $table[] = intval( $parts[ $i ] );
    } else {
        for ( $i = 0; $i < 3; $i++ )
            $table[] = "NULL";
    }    

    // insert row stats into table info array - or pad with 'NULL'.
    if ( $rowStats )
    {    
        
        // switch for server version.
        //  -> 14 parts in 2005.2 track output.
        //     *read/write locks aren't differentiated.
        //  -> 17 parts in 2006.1+ track output.
        //     *read/write locks are broken out.
        $parts = preg_split( "/[ |\/|\+]+/", $rowStats );
        if ( count( $parts ) == 14 )
        {
            $table[] = intval( $parts[2] );
            $table[] = intval( $parts[2] );
            for ( $i = 9; $i < 14; $i++ )
                $table[] = intval( $parts[ $i ] );
        } else {
            $table[] = intval( $parts[4] );
            $table[] = intval( $parts[5] );
            for ( $i = 12; $i < 17; $i++ )
                $table[] = intval( $parts[ $i ] );
        }
        
    } else {
        for ( $i = 0; $i < 7; $i++ )
            $table[] = "NULL";
    }    

    // insert lock stats into table info array - or pad with 'NULL'.
    if ( $lockStats )
    {
        $parts = preg_split( "/[ |\/|\+]+/", $lockStats );
        for ( $i = 0; $i < 4; $i++ )
            $table[] = intval( $parts[ $i ] );
    } else {
        for ( $i = 0; $i < 4; $i++ )
            $table[] = "NULL";
    }    

    // return table info array.
    return $table;

}


/**
 * Create process and tableUse tables.
 *
 * @return  string  the sql to create the tables.
 */
function getCreateStatements()
{

    // create syntax for process table.
    $sql  =
        "CREATE TABLE IF NOT EXISTS process ( \n" .
        " processKey varchar(36) NOT NULL, \n" .
        "       time int NOT NULL, \n" .
        "        pid int NOT NULL, \n" . 
        "       user varchar(255) NOT NULL, \n" . 
        "     client varchar(255) NOT NULL, \n" . 
        "         ip varchar(255) NOT NULL, \n" . 
        "        app varchar(255) NOT NULL, \n" . 
        "        cmd varchar(255) NOT NULL, \n" . 
        "       args text NULL, \n" .
        "      lapse decimal(10,3) NULL, \n" .
        "       uCpu int NULL, \n" .
        "       sCpu int NULL, \n" .
        "     diskIn int NULL, \n" .
        "    diskOut int NULL, \n" .
        "      ipcIn int NULL, \n" .
        "     ipcOut int NULL, \n" .
        "     maxRss int NULL, \n" .
        " pageFaults int NULL, \n" .
        "  rpcMsgsIn int NULL, \n" .
        " rpcMsgsOut int NULL, \n" .
        "  rpcSizeIn int NULL, \n" .
        " rpcSizeOut int NULL, \n" .
        " PRIMARY KEY ( processKey ) \n" .
        "); \n";
        
    // create syntax for tableUse table.
    $sql .=
        "CREATE TABLE IF NOT EXISTS tableUse ( \n" .
        "  processKey varchar(36) NOT NULL, \n" .
        "   tableName varchar(255) NOT NULL, \n" . 
        "     pagesIn int NULL, \n" .
        "    pagesOut int NULL, \n" .
        " pagesCached int NULL, \n" .
        "   readLocks int NULL, \n" .
        "  writeLocks int NULL, \n" .
        "     getRows int NULL, \n" .
        "     posRows int NULL, \n" .
        "    scanRows int NULL, \n" .
        "     putRows int NULL, \n" .
        "     delRows int NULL, \n" .
        "    readWait int NULL, \n" .
        "    readHeld int NULL, \n" .
        "   writeWait int NULL, \n" .
        "   writeHeld int NULL, \n" .
        " PRIMARY KEY ( processKey, tableName ) \n" .
        "); \n";        
    
    return $sql;
    
}


/**
 * Quote a string for use as a SQL value. Wraps string in 
 * single quotes and escapes embedded single quotes with a quote.
 *
 * @param   string  value   the string to quote.
 * @return  string  the quoted and escaped string.
 */
function quote( $value )
{
    return "'" . str_replace( "'", "''", $value ) . "'";
}


?>