log2sql.py #1

#!/usr/bin/env python3
# -*- encoding: UTF8 -*-

##############################################################################
#
# Copyright (c) 2008,2016 Perforce Software, Inc.  All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1.  Redistributions of source code must retain the above copyright
#     notice, this list of conditions and the following disclaimer.
#
# 2.  Redistributions in binary form must reproduce the above copyright
#     notice, this list of conditions and the following disclaimer in the
#     documentation and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL PERFORCE SOFTWARE, INC. BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# = Description
#
#   Output SQL queries generated by parsing commands logged in a Perforce
#   server log file.
#   The output can be imported directly in any SQL database.
#
#   log2sql populates the following tables:
#
#   +----------------+----------+------+-----+
#   |                 process                |
#   +----------------+----------+------+-----+
#   | Field          | Type     | Null | Key |
#   +----------------+----------+------+-----+
#   | processKey     | char(32) | NO   | PRI |
#   | lineNumber     | int      | NO   | PRI |
#   | startTime      | date     | NO   |     |
#   | endTime        | date     | YES  |     |
#   | computedLapse  | float    | YES  |     |
#   | completedLapse | float    | YES  |     |
#   | pid            | int      | NO   |     |
#   | user           | text     | NO   |     |
#   | workspace      | text     | NO   |     |
#   | ip             | text     | NO   |     |
#   | app            | text     | NO   |     |
#   | cmd            | text     | NO   |     |
#   | args           | text     | YES  |     |
#   | uCpu           | int      | YES  |     |
#   | sCpu           | int      | YES  |     |
#   | diskIn         | int      | YES  |     |
#   | diskOut        | int      | YES  |     |
#   | ipcIn          | int      | YES  |     |
#   | ipcOut         | int      | YES  |     |
#   | maxRss         | int      | YES  |     |
#   | pageFaults     | int      | YES  |     |
#   | rpcMsgsIn      | int      | YES  |     |
#   | rpcMsgsOut     | int      | YES  |     |
#   | rpcSizeIn      | int      | YES  |     |
#   | rpcSizeOut     | int      | YES  |     |
#   | rpcHimarkFwd   | int      | YES  |     |
#   | rpcHimarkRev   | int      | YES  |     |
#   | rpcSnd         | float    | YES  |     |
#   | rpcRcv         | float    | YES  |     |
#   | running        | int      | NO   |     |
#   | error          | text     | YES  |     |
#   +----------------+----------+------+-----+
#
#   +----------------+--------------+------+-----+
#   |                  tableUse                  |
#   +----------------+--------------+------+-----+
#   | Field          | Type         | Null | Key |
#   +----------------+--------------+------+-----+
#   | processKey     | char(32)     | NO   | PRI |
#   | lineNumber     | int          | NO   | PRI |
#   | tableName      | varchar(255) | NO   | PRI |
#   | pagesIn        | int          | YES  |     |
#   | pagesOut       | int          | YES  |     |
#   | pagesCached    | int          | YES  |     |
#   | readLocks      | int          | YES  |     |
#   | writeLocks     | int          | YES  |     |
#   | getRows        | int          | YES  |     |
#   | posRows        | int          | YES  |     |
#   | scanRows       | int          | YES  |     |
#   | putRows        | int          | YES  |     |
#   | delRows        | int          | YES  |     |
#   | totalReadWait  | int          | YES  |     |
#   | totalReadHeld  | int          | YES  |     |
#   | totalWriteWait | int          | YES  |     |
#   | totalWriteHeld | int          | YES  |     |
#   | maxReadWait    | int          | YES  |     |
#   | maxReadHeld    | int          | YES  |     |
#   | maxWriteWait   | int          | YES  |     |
#   | maxWriteHeld   | int          | YES  |     |
#   +----------------+--------------+------+-----+
#
# = Usage
#
#   log2sql.rb -d <database_name> log_file
#
# = Requirements
#
#   Ruby: http://www.ruby-lang.org
#
##############################################################################

from __future__ import print_function

import re
import sys
import os
import math
import argparse
import datetime
import hashlib
import sqlite3

python3 = sys.version_info[0] >= 3

RowsPerTransaction = 1000   # Commit every so many rows if in SQL mode

def escapeChar(str):
    str = str.replace("\\", "\\\\")
    str = str.replace("\"", "\\\"")
    return str

def parseDateTime(str):
    # \d+/\d+/\d+ \d+:\d+:\d+ - much faster just to do str.replace()
    return "\"%s\"" % str.replace("/", "-")

def dateAdd(str, seconds):
    "Add specified seconds to date in string format"
    str = str.replace('"', '')
    date = datetime.datetime.strptime(str, "%Y-%m-%d %H:%M:%S")
    date = date + datetime.timedelta(seconds=seconds)
    return date.strftime("\"%Y-%m-%d %H:%M:%S\"")

def nullValue(str):
    "For SQL inserts"
    if str == "NULL":
        return None
    return str

class Command:
    processKey = None
    pid = 0
    completed = False   # Set when a completed record is found
    startTime = None
    endTime = None
    computedLapse = 0.0
    completedLapse = 0.0
    user = None
    workspace = None
    ip = None
    app = None
    name = None
    args = None
    uCpu = None
    sCpu = None
    diskIn = None
    diskOut = None
    ipcIn = None
    ipcOut = None
    maxRss = None
    pageFaults = None
    rpcMsgsIn = None
    rpcMsgsOut = None
    rpcSizeIn = None
    rpcSizeOut = None
    rpcHimarkFwd = None
    rpcHimarkRev = None
    rpcSnd = None
    rpcRcv = None
    running = 0
    lineNumber = 0
    error = None

    def __init__(self, processKey, lineNumber, pid, startTime,
                 user, workspace, ip, app, name, args):
        self.processKey = processKey
        self.lineNumber = lineNumber
        self.pid = pid
        self.startTime = startTime
        self.user = escapeChar(user)
        self.workspace = escapeChar(workspace)
        self.ip = ip
        self.app = app
        self.name = name
        if args:
            self.args = escapeChar(args)
        else:
            self.args = "NULL"
        self.endTime = self.computedLapse = self.completedLapse = self.uCpu = "NULL"
        self.sCpu = self.diskIn = self.diskOut = self.ipcIn = self.ipcOut = self.maxRss = "NULL"
        self.pageFaults = self.rpcMsgsIn = self.rpcMsgsOut = self.rpcSizeOut = "NULL"
        self.rpcSizeIn = self.rpcHimarkFwd = self.rpcHimarkRev = self.error = "NULL"
        self.rpcSnd = self.rpcRcv = "NULL"
        self.running = 0

    def updateFrom(self, other):
        "Update any potentiall null fields - see __init__"
        for f in ["endTime", "computedLapse", "completedLapse", "uCpu",
               "sCpu", "diskIn", "diskOut", "ipcIn", "ipcOut", "maxRss",
               "pageFaults", "rpcMsgsIn", "rpcMsgsOut", "rpcSizeOut",
                "rpcSizeIn", "rpcHimarkFwd", "rpcHimarkRev", "error",
                "rpcSnd", "rpcRcv"]:
            if getattr(other, f) != "NULL" and getattr(self, f) == "NULL":
                setattr(self, f, getattr(other, f))

    def setUsage(self, uCpu, sCpu, diskIn, diskOut, ipcIn, ipcOut, maxRss, pageFaults):
        self.uCpu = uCpu
        self.sCpu = sCpu
        self.diskIn = diskIn
        self.diskOut = diskOut
        self.ipcIn = ipcIn
        self.ipcOut = ipcOut
        self.maxRss = maxRss
        self.pageFaults = pageFaults

    def setRpc(self, rpcMsgsIn, rpcMsgsOut, rpcSizeIn,
               rpcSizeOut, rpcHimarkFwd, rpcHimarkRev, rpcSnd="NULL", rpcRcv="NULL"):
        self.rpcMsgsIn = rpcMsgsIn
        self.rpcMsgsOut = rpcMsgsOut
        self.rpcSizeIn = rpcSizeIn
        self.rpcSizeOut = rpcSizeOut
        self.rpcHimarkFwd = rpcHimarkFwd
        self.rpcHimarkRev = rpcHimarkRev
        self.rpcSnd = rpcSnd
        self.rpcRcv = rpcRcv

class Table:
    processKey = None
    lineNumber = 0
    tableName = None
    pagesIn = None
    pagesOut = None
    pagesCached = None
    readLocks = None
    writeLocks = None
    getRows = None
    posRows = None
    scanRows = None
    putRows = None
    delRow = None
    totalReadWait = None
    totalReadHeld = None
    totalWriteWait = None
    totalWriteHeld = None
    maxReadWait = None
    maxReadHeld = None
    maxWriteWait = None
    maxWriteHeld = None

    def __init__(self, processKey, lineNumber, tableName):
        self.processKey = processKey
        self.lineNumber = lineNumber
        self.tableName = tableName
        self.pagesIn = self.pagesOut = self.pagesCached = "NULL"
        self.readLocks = self.writeLocks = self.getRows = self.posRows = self.scanRows = "NULL"
        self.putRows = self.delRow = self.totalReadWait = self.totalReadHeld = "NULL"
        self.totalWriteWait = self.totalWriteHeld = self.maxReadWait = "NULL"
        self.maxReadHeld = self.maxWriteWait = self.maxWriteHeld = "NULL"

    def setPages(self, pagesIn, pagesOut, pagesCached):
        self.pagesIn = pagesIn
        self.pagesOut = pagesOut
        self.pagesCached = pagesCached

    def setLocksRows(self, readLocks, writeLocks, getRows, posRows,
                     scanRows, putRows, delRow):
        self.readLocks = readLocks
        self.writeLocks = writeLocks
        self.getRows = getRows
        self.posRows = posRows
        self.scanRows = scanRows
        self.putRows = putRows
        self.delRow = delRow

    def setTotalLock(self, totalReadWait, totalReadHeld, totalWriteWait, totalWriteHeld):
        self.totalReadWait = totalReadWait
        self.totalReadHeld = totalReadHeld
        self.totalWriteWait = totalWriteWait
        self.totalWriteHeld = totalWriteHeld

    def setMaxLock(self, maxReadWait, maxReadHeld, maxWriteWait, maxWriteHeld):
        self.maxReadWait = maxReadWait
        self.maxReadHeld = maxReadHeld
        self.maxWriteWait = maxWriteWait
        self.maxWriteHeld = maxWriteHeld

class Block:
    def __init__(self):
        self.lines = []
        self.lineNo = 0

    def addLine(self, line, lineNo):
        self.lines.append(line)
        # Only at start of block
        if not self.lineNo:
            self.lineNo = lineNo

class Log2sql:
    logname = None
    dbname = None
    logfile = None
    ckpSize = None
    readBytes = None
    mod = None
    cmds = None
    lineNo = 0

    def __init__(self, options, instream=None, outstream=None):
        self.dbname = options.dbname
        self.options = options
        self.outstream = outstream    # For testing
        if outstream is None:
            if options.output:
                if options.output == "-":
                    self.outstream = sys.stdout
                else:
                    self.outstream = open(options.output, "w")
            else:
                self.outstream = None
        if instream is None:
            self.logfile = open(options.logfile[0], "r")
            self.ckpSize = os.stat(options.logfile[0]).st_size
        else:
            self.logfile = instream
            self.ckpSize = 500
        if self.options.sql:
            self.conn = sqlite3.connect("%s.db" % self.dbname)
            self.conn.text_factory = str
            self.cursor = self.conn.cursor()
            self.cursor.execute("PRAGMA synchronous = OFF")
            self.cursor.execute("PRAGMA journal_mode = MEMORY")
        self.cmds = {}
        self.tables = {}
        self.readBytes = 0.0
        self.mod = 10.0
        self.running = 0
        self.rowCount = 0
        self.db_create_database()
        self.db_create_process()
        self.db_create_tableUse()
        query = "BEGIN TRANSACTION;"
        self.output(query)
        if self.options.sql:
            self.conn.execute(query)
        self.output("Processing %s: 0%%" % self.logname, file=sys.stderr)

    def outputRequired(self, file=sys.stdout):
        return self.outstream or file == sys.stderr

    def output(self, text, file=sys.stdout):
        if (file == sys.stderr):
            print(text, file=file)
        elif self.outstream:
            self.outstream.write("%s\n" % text)

    def terminate(self):
        self.flush_output()
        self.db_commit(True)
        if self.options.sql:
            self.conn.commit()
            self.conn.close()

    def readLogfile(self):
        line = self.logfile.readline()
        if line:
            self.lineNo += 1
            # TODO
            # if line.respond_to?(:encode)
            #    line = line.encode("utf-8", 'binary', :invalid => :replace, :undef => :replace)
            #line = line.decode('utf-8', errors='ignore')
            self.readBytes += len(line)
            if math.floor(((self.readBytes / self.ckpSize) * 100.0) / self.mod) == 1.0:
                self.output("...%d%%" % self.mod, file=sys.stderr)
                # $stderr.flush
                self.mod += 10
        return line

    def getLineNumber(self):
        return self.lineNo

    def db_commit_updates(self):
        self.rowCount += 1
        if self.rowCount % RowsPerTransaction == 0:
            query = "COMMIT;"
            if self.outputRequired():
                self.output(query)
            if self.options.sql:
                self.conn.commit()
            query = "BEGIN TRANSACTION;"
            if self.outputRequired():
                self.output(query)
            if self.options.sql:
                self.conn.execute(query)

    def db_commit(self, state):
        if (state):
            query = "COMMIT;"
            if self.outputRequired():
                self.output(query)
            if self.options.sql:
                self.conn.commit()
            query = "BEGIN TRANSACTION;"
            if self.outputRequired():
                self.output(query)
            if self.options.sql:
                self.conn.execute(query)
        else:
            query = "SET autocommit=0;"
            if self.outputRequired():
                self.output(query)

    def db_create_database(self):
        query = "CREATE DATABASE IF NOT EXISTS " + self.dbname + ";"
        if self.outputRequired():
            self.output(query)
        query = "USE " + self.dbname + ";"
        if self.outputRequired():
            self.output(query)

    def db_create_process(self):
        query = "DROP TABLE IF EXISTS process;"
        if self.outputRequired():
            self.output(query)
        query = "CREATE TABLE process (processkey CHAR(32) NOT NULL, lineNumber INT NOT NULL, pid INT NOT NULL, " \
                "startTime DATETIME NOT NULL,endTime DATETIME NULL, computedLapse FLOAT NULL,completedLapse FLOAT NULL, " \
                "user TEXT NOT NULL, workspace TEXT NOT NULL, ip TEXT NOT NULL, app TEXT NOT NULL, cmd TEXT NOT NULL, " \
                "args TEXT NULL, uCpu INT NULL, sCpu INT NULL, diskIn INT NULL, diskOut INT NULL, ipcIn INT NULL, " \
                "ipcOut INT NULL, maxRss INT NULL, pageFaults INT NULL, rpcMsgsIn INT NULL, rpcMsgsOut INT NULL, " \
                "rpcSizeIn INT NULL, rpcSizeOut INT NULL, rpcHimarkFwd INT NULL, rpcHimarkRev INT NULL, " \
                "rpcSnd FLOAT NULL, rpcRcv FLOAT NULL, running INT NULL, " \
                "error TEXT NULL, PRIMARY KEY (processkey, lineNumber));"
        if self.outputRequired():
            self.output(query)
        if self.options.sql:
            try:
                self.cursor.execute(query)
            except:
                pass

    def db_create_tableUse(self):
        query = "DROP TABLE IF EXISTS tableUse;"
        if self.outputRequired():
            self.output(query)
        if self.options.sql:
            self.cursor.execute(query)
        query = "CREATE TABLE tableUse (processkey CHAR(32) NOT NULL, lineNumber INT NOT NULL, " \
                "tableName VARCHAR(255) NOT NULL, pagesIn INT NULL, pagesOut INT NULL, pagesCached INT NULL, " \
                "readLocks INT NULL, writeLocks INT NULL, getRows INT NULL, posRows INT NULL, scanRows INT NULL, " \
                "putRows int NULL, delRows INT NULL, totalReadWait INT NULL, totalReadHeld INT NULL, " \
                "totalWriteWait INT NULL, totalWriteHeld INT NULL, maxReadWait INT NULL, maxReadHeld INT NULL, " \
                "maxWriteWait INT NULL, maxWriteHeld INT NULL, PRIMARY KEY (processkey, lineNumber, tableName));"
        if self.outputRequired():
            self.output(query)
        if self.options.sql:
            try:
                self.cursor.execute(query)
            except:
                pass

    def addCommand(self, cmd, vtrack):
        cmd.running = self.running
        # If we are re-using pid then process previous command
        # Or process 'rmt-FileFetch' because they don't have 'completed' records
        if cmd.pid in self.cmds:
            if cmd.processKey != self.cmds[cmd.pid].processKey:
                self.sql_process_insert(self.cmds[cmd.pid])
                self.cmds[cmd.pid] = cmd    # replace
            elif cmd.name == 'rmt-FileFetch' and not vtrack:
                self.sql_process_insert(self.cmds[cmd.pid])
                self.cmds[cmd.pid] = cmd    # replace
            else:
                self.cmds[cmd.pid].updateFrom(cmd)
        else:
            self.cmds[cmd.pid] = cmd
            self.running += 1
            self.processCompletedCommands()

    def processCompletedCommands(self):
        """Remove any commands with completed state - should only be called by addCommand when
        we are adding a new command - at that point we know there are no new records to come for any previous
        ones as even track output will have been processed"""
        for pid in self.cmds.keys():
            if self.cmds[pid].completed:
                self.sql_process_insert(self.cmds[pid])
                del self.cmds[pid]
                self.running -= 1

    def updateComputedTime(self, pid, computedLapse):
        if pid in self.cmds:
            # sum all the compute values of a same command
            if self.cmds[pid].computedLapse == "NULL":
                sum = 0.0
            else:
                sum = float(self.cmds[pid].computedLapse)
            sum += float(computedLapse)
            self.cmds[pid].computedLapse = str(sum)

    def updateCompletionTime(self, pid, endTime, completedLapse):
        if pid in self.cmds:
            self.cmds[pid].completed = True
            self.cmds[pid].endTime = endTime
            self.cmds[pid].completedLapse = str(float(completedLapse))

    def processTrackRecords(self, cmd, lines):
        self.tables = {}
        tableName = None
        rpcMsgsIn = rpcMsgsOut = rpcSizeIn = rpcSizeOut = rpcSnd = rpcRcv = 0
        rpcHimarkFwd = rpcHimarRev = uCpu = 0
        sCpu = diskIn = diskOut = ipcIn = ipcOut = maxRss = pageFaults = 0
        readLocks = writeLocks = getRows = posRows = scanRows = 0
        putRows = delRow = totalReadWait = totalReadHeld = 0
        totalWriteWait = totalWriteHeld = maxReadWait = 0
        maxReadHeld = maxWriteWait = maxWriteHeld = 0
        # Use line number from original cmd if appropriate
        if cmd.pid in self.cmds and cmd.processKey == self.cmds[cmd.pid].processKey:
            cmd.lineNumber = self.cmds[cmd.pid].lineNumber
        for line in lines:
            if not RE_TRACK.match(line):
                break
            gotMatch = False
            match = RE_TRACK_LAPSE.match(line)
            if match:
                gotMatch = True
                cmd.completedLapse = match.group(1)
            if not gotMatch:
                match = RE_FAILED_AUTH.match(line)
                if match:
                    cmd.error = "\"" + match.group(1) + "\""
            if not gotMatch:
                match = RE_KILLED_BY.match(line)
                if match:
                    gotMatch = True
                    cmd.error = "\"" + match.group(1) + "\""
            if not gotMatch:
                match = RE_EXITED.match(line)
                if match:
                    gotMatch = True
                    cmd.error = "\"" + match.group(1) + "\""
            if not gotMatch:
                match = RE_TRACK_USAGE.match(line)
                if match:
                    gotMatch = True
                    cmd.setUsage(match.group(1), match.group(2), match.group(3), match.group(4),
                                 match.group(5), match.group(6), match.group(7), match.group(8))
            if not gotMatch:
                match = RE_TRACK_RPC2.match(line)
                if match:
                    gotMatch = True
                    cmd.setRpc(match.group(1), match.group(2), match.group(3),
                               match.group(4), match.group(5), match.group(6),
                               match.group(7), match.group(8))
            if not gotMatch:
                match = RE_TRACK_RPC.match(line)
                if match:
                    gotMatch = True
                    cmd.setRpc(match.group(1), match.group(2), match.group(3),
                               match.group(4), match.group(5), match.group(6))
            if not gotMatch:
                match = RE_TRACK_TABLE.match(line)
                if match:
                    gotMatch = True
                    tableName = match.group(1)
                    self.tables[tableName] = Table(cmd.processKey,
                                                   cmd.lineNumber, tableName)
            if not gotMatch:
                match = RE_TRACK_PAGES.match(line)
                if match:
                    gotMatch = True
                    if tableName in self.tables:
                        self.tables[tableName].setPages(match.group(1), match.group(2), match.group(3))
            if not gotMatch:
                match = RE_TRACK_LOCKS_ROWS.match(line)
                if match:
                    gotMatch = True
                    if tableName in self.tables:
                        self.tables[tableName].setLocksRows(match.group(1), match.group(2),
                                                            match.group(3), match.group(4),
                                                            match.group(5), match.group(6),
                                                            match.group(7))
            if not gotMatch:
                match = RE_TRACK_TOTAL_LOCK.match(line)
                if match:
                    gotMatch = True
                    if tableName in self.tables:
                        self.tables[tableName].setTotalLock(match.group(1), match.group(2),
                                                            match.group(3), match.group(4))
                        # The max is only reported if there is more than one
                        # lock taken on the table
                        if (self.tables[tableName].readLocks == "NULL" or
                            int(self.tables[tableName].readLocks) < 2) and \
                            (self.tables[tableName].writeLocks == "NULL" or
                            int(self.tables[tableName].writeLocks) < 2):
                            self.tables[tableName].setMaxLock(match.group(1), match.group(2),
                                                              match.group(3), match.group(4))
            if not gotMatch:
                match = RE_TRACK_MAX_LOCK.match(line)
                if match:
                    gotMatch = True
                    if tableName in self.tables:
                        self.tables[tableName].setMaxLock(match.group(1), match.group(2),
                                                          match.group(3), match.group(4))
        if cmd.completedLapse != "NULL":
            cmd.endTime = dateAdd(cmd.startTime, int(cmd.completedLapse))
        else:
            cmd.endTime = cmd.startTime
        self.addCommand(cmd, True)
        try:
            if python3:
                for k, v in self.tables.items():
                    self.sql_tableUse_insert(v)
            else:
                for table in self.tables.values():
                    self.sql_tableUse_insert(table)
        except Exception as e:
            print("%s, lineNo %d, cmd lineNo %d, %s, %s, %s" % (str(e), self.lineNo, cmd.lineNumber, cmd.processKey, cmd.name, cmd.args))

    def flush_output(self):
        if python3:
            for p, v in self.cmds.items():
                self.sql_process_insert(v)
        else:
            for p, v in self.cmds.iteritems():
                self.sql_process_insert(v)
        self.output("", file=sys.stderr)

    def sql_process_insert(self, cmd):
        if self.outputRequired():
            query = 'INSERT IGNORE INTO process VALUES ("%s",%d,%d,%s,%s,%s,%s,"%s","%s","%s","%s","%s","%s",%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%d,%s);' % \
                    (cmd.processKey, cmd.lineNumber, cmd.pid,
                     cmd.startTime, cmd.endTime, cmd.computedLapse,
                     cmd.completedLapse, cmd.user, cmd.workspace,
                     cmd.ip, cmd.app, cmd.name, cmd.args, cmd.rpcMsgsIn,
                     cmd.rpcMsgsOut, cmd.rpcSizeIn, cmd.rpcSizeOut,
                     cmd.rpcHimarkFwd, cmd.rpcHimarkRev, cmd.rpcSnd, cmd.rpcRcv, cmd.uCpu,
                     cmd.sCpu, cmd.diskIn, cmd.diskOut, cmd.ipcIn, cmd.ipcOut,
                     cmd.maxRss, cmd.pageFaults, cmd.running, cmd.error)
            self.output(query)
        if self.options.sql:
            try:
                self.cursor.execute('INSERT INTO process VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)',
                                    (cmd.processKey, cmd.lineNumber, cmd.pid,
                                     cmd.startTime, nullValue(cmd.endTime), nullValue(cmd.computedLapse),
                                     nullValue(cmd.completedLapse), nullValue(cmd.user), nullValue(cmd.workspace),
                                     nullValue(cmd.ip), nullValue(cmd.app), nullValue(cmd.name), nullValue(cmd.args),
                                     nullValue(cmd.rpcMsgsIn),
                                     nullValue(cmd.rpcMsgsOut), nullValue(cmd.rpcSizeIn), nullValue(cmd.rpcSizeOut),
                                     nullValue(cmd.rpcHimarkFwd), nullValue(cmd.rpcHimarkRev),
                                     nullValue(cmd.rpcSnd), nullValue(cmd.rpcRcv), nullValue(cmd.uCpu),
                                     nullValue(cmd.sCpu), nullValue(cmd.diskIn), nullValue(cmd.diskOut),
                                     nullValue(cmd.ipcIn), nullValue(cmd.ipcOut),
                                     nullValue(cmd.maxRss), nullValue(cmd.pageFaults), nullValue(cmd.running),
                                     nullValue(cmd.error)))
            except Exception as e:
                raise Exception("%s: %s, %d, %d, %s" % (str(e), cmd.processKey, cmd.lineNumber, cmd.pid, cmd.name))
        self.db_commit_updates()

    def sql_tableUse_insert(self, tab):
        if self.outputRequired():
            query = 'INSERT IGNORE INTO tableUse VALUES ("%s",%d,"%s",%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,' \
                    '%s,%s,%s,%s);' % \
                    (tab.processKey, tab.lineNumber, tab.tableName, tab.pagesIn,
                     tab.pagesOut, tab.pagesCached, tab.readLocks,
                     tab.writeLocks, tab.getRows, tab.posRows, tab.scanRows,
                     tab.putRows, tab.delRow,
                     tab.totalReadWait, tab.totalReadHeld,
                     tab.totalWriteWait, tab.totalWriteHeld,
                     tab.maxReadWait, tab.maxReadHeld,
                     tab.maxWriteWait, tab.maxWriteHeld)
            self.output(query)
        if self.options.sql:
            try:
                self.cursor.execute('INSERT INTO tableUse VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)',
                    (tab.processKey, tab.lineNumber, tab.tableName,
                     nullValue(tab.pagesIn), nullValue(tab.pagesOut), nullValue(tab.pagesCached),
                     nullValue(tab.readLocks), nullValue(tab.writeLocks), nullValue(tab.getRows), nullValue(tab.posRows),
                     nullValue(tab.scanRows), nullValue(tab.putRows), nullValue(tab.delRow),
                     nullValue(tab.totalReadWait), nullValue(tab.totalReadHeld),
                     nullValue(tab.totalWriteWait), nullValue(tab.totalWriteHeld),
                     nullValue(tab.maxReadWait), nullValue(tab.maxReadHeld),
                     nullValue(tab.maxWriteWait), nullValue(tab.maxWriteHeld)))
            except Exception as e:
                raise Exception("%s: %s, %d, %s" % (str(e), tab.processKey, tab.lineNumber, tab.tableName))
        self.db_commit_updates()

    def processInfoBlock(self, block):
        cmd = None
        i = 0
        # First line of block is info line - process the rest
        for line in block.lines[1:]:
            i += 1
            # Check for track lines and once we have found one, process them all and finish
            if cmd:
                match = RE_TRACK.match(line)
                if match:
                    self.processTrackRecords(cmd, block.lines[i:])
                    break   # Block has been processed

            match = RE_CMD_NOARG.match(line)
            if not match:
                match = RE_CMD.match(line)
            if match:
                pid = int(match.group(2))
                startTime = parseDateTime(match.group(1))
                user = match.group(3)
                workspace = match.group(4)
                ip = match.group(5)
                # following gsub required due to a 2009.2 P4V bug
                app = match.group(6).replace("\x00", "/")
                name = match.group(7)
                cmdArgs = None
                if len(match.groups()) == 8:
                    cmdArgs = match.group(8)
                    # Strip Swarm/Git Fusion commands with lots of json
                    smatch = re.search(RE_JSON_CMDARGS, cmdArgs)
                    if smatch:
                        cmdArgs = smatch.group(1)
                m = hashlib.md5()
                line = re.sub(r'[^\x00-\x7F]', '?', line)   # Do this so we can compare results with ruby version
                if python3:
                    m.update(line.encode())
                else:
                    m.update(line)
                processKey = m.hexdigest()
                cmd = Command(processKey, block.lineNo, pid, startTime,
                              user, workspace, ip, app, name, cmdArgs)
                self.addCommand(cmd, False)
                continue

            # Pattern matching a completed line
            match = RE_COMPLETED.match(line)
            if match:
                pid = int(match.group(2))
                endTime = parseDateTime(match.group(1))
                completedLapse = match.group(3)
                self.updateCompletionTime(pid, endTime, completedLapse)
                continue
            # Pattern matching a compute line
            match = RE_COMPUTE.match(line)
            if match:
                pid = int(match.group(2))
                computedLapse = float(match.group(3))
                self.updateComputedTime(pid, computedLapse)
                continue

    def blockEnd(self, line):
        "Blank line or one of terminators"
        terminators = ["Perforce server info:",
                       "Perforce server error:"
                       "locks acquired by blocking after",
                       "Rpc himark:"]
        if line == "" or line == "\n" or line == "\r\n":
            return True
        for t in terminators:
            if line[:len(t)] == t:
                return True
        return False

    def blockInfo(self, line):
        t = "Perforce server info:"
        if line[:len(t)] == t:
            return True
        return False

    def processLog(self):
        block = Block()
        line = self.readLogfile()
        while line:
            if self.blockEnd(line):
                if block.lines:
                    if self.blockInfo(block.lines[0]):
                        self.processInfoBlock(block)
                    # TODO: process errors etc
                    block = Block()
                block.addLine(line, self.lineNo)
            else:
                block.addLine(line, self.lineNo)
            line = self.readLogfile()
        if block.lines:
            if self.blockInfo(block.lines[0]):
                self.processInfoBlock(block)
        self.terminate()

######################
# START OF MAIN SCRIPT
######################

RE_INFO = re.compile('^Perforce server info:')
RE_EMPTY = re.compile('^\s*$')
RE_JSON_CMDARGS = re.compile('^(.*) \{.*\}$')
RE_CMD = re.compile('^\t(\d+/\d+/\d+ \d+:\d+:\d+) pid (\d+) (.*)@(.*) (.*) \[(.*)\] \'(\w+-\w+) (.*)\'.*')
RE_CMD_NOARG = re.compile('^\t(\d+/\d+/\d+ \d+:\d+:\d+) pid (\d+) (.*)@(.*) (.*) \[(.*)\] \'(\w+-\w+)\'.*')
RE_COMPUTE = re.compile('^\t(\d+/\d+/\d+ \d+:\d+:\d+) pid (\d+) compute end ([0-9]+|[0-9]+\.[0-9]+|\.[0-9]+)s.*')
RE_COMPLETED = re.compile('^\t(\d+/\d+/\d+ \d+:\d+:\d+) pid (\d+) completed ([0-9]+|[0-9]+\.[0-9]+|\.[0-9]+)s.*')
RE_TRACK = re.compile('^---|^locks acquired by blocking after 3 non-blocking attempts')
RE_TRACK_LAPSE = re.compile('^--- lapse (\d+)')
RE_TRACK_RPC = re.compile('^--- rpc msgs/size in\+out (\d+)\+(\d+)/(\d+)mb\+(\d+)mb himarks (\d+)/(\d+)')
RE_TRACK_RPC2 = re.compile('^--- rpc msgs/size in\+out (\d+)\+(\d+)/(\d+)mb\+(\d+)mb himarks (\d+)/(\d+) snd/rcv ([0-9]+|[0-9]+\.[0-9]+|\.[0-9]+)s/([0-9]+|[0-9]+\.[0-9]+|\.[0-9]+)s')
RE_TRACK_USAGE = re.compile('^--- usage (\d+)\+(\d+)us (\d+)\+(\d+)io (\d+)\+(\d+)net (\d+)k (\d+)pf')
RE_FAILED_AUTH = re.compile('^--- (failed authentication check)')
RE_KILLED_BY = re.compile('^--- (killed by .*)')
RE_EXITED = re.compile('^--- (exited on fatal server error)')
RE_TRACK_TABLE = re.compile('^--- db.([a-zA-Z]*)')
RE_TRACK_PAGES = re.compile('^---   pages in\+out\+cached (\d+)\+(\d+)\+(\d+)')
RE_TRACK_LOCKS_ROWS = re.compile(
    '^---   locks read/write (\d+)/(\d+) rows get\+pos\+scan put\+del (\d+)\+(\d+)\+(\d+) (\d+)\+(\d+)')
RE_TRACK_TOTAL_LOCK = re.compile('^---   total lock wait\+held read/write (\d+)ms\+(\d+)ms/(\d+)ms\+(\d+)ms')
RE_TRACK_MAX_LOCK = re.compile(
    '^---   max lock wait\+held read/write (\d+)ms\+(\d+)ms/(\d+)ms\+(\d+)ms|---   locks wait+held read/write (\d+)ms\+(\d+)ms/(\d+)ms\+(\d+)ms')


def main():
    parser = argparse.ArgumentParser(add_help=True)
    parser.add_argument('logfile', nargs=1, help='log file to process')
    parser.add_argument('-d', '--dbname', help="Database name to use", default=None)
    parser.add_argument('-o', '--output', help="Name of file to print SQL to (if not specified then no output)", default=None)
    parser.add_argument('-s', '--sql', help="Use local SQL Lite database", action='store_true', default=False)
    try:
        options = parser.parse_args()
    except:
        parser.print_help()
        sys.exit(1)
    if not options.output and not options.sql:
        print("Please specify either an output file or -s/--sql")
        parser.print_help()
        sys.exit(1)

    log2sql = Log2sql(options)
    log2sql.processLog()

if __name__ == '__main__':
    main()
#	Change	User	Description
#5	23765	Robert Cowham	Moved things down one level to psla dir to make it easier to see what belongs
#4	23719	Robert Cowham	Log warnings for insert exceptions
#3	23683	Robert Cowham	Bring in latest changes - performance improvements
#2	23636	Robert Cowham	Merge in latests changes to log2sql.py - much faster - better at detecting completed commands and out of order vtrack - full test harness
#1	22465	Robert Cowham	Added tests for r99.2 created depot (no filesize/digest stored) Fixed problem with preview failing comparison Add --p4d option to tests to use different p4d version
//guest/robert_cowham/perforce/utils/log_analysis/log2sql.py
#22	21796	Robert Cowham	Handle rmt-FileFetch records which may be duplicates as they don't have 'completed' entries.
#21	21780	Robert Cowham	Don't drop tables when creating sqlite db Performance tweaks after profiling - particularly date parsing.
#20	21713	Robert Cowham	Ignore temporary error inserting into trackuse table
#19	21712	Robert Cowham	Check for correct line number when looking to update for track entries
#18	21708	Robert Cowham	Tweak parameters to improve error messages. Make output to stdout optional
#17	21705	Robert Cowham	Minor refactoring for clarity. Process all completed records as we go.
#16	21702	Robert Cowham	Rework algorithm to process blocks at a time, and to avoid the SQL update statements. Risk of increased memory vs improved speed.
#15	21697	Robert Cowham	Parse rcpSnd/rpcRcv and add to database.
#14	21694	Robert Cowham	Handle null values properly
#13	21672	Robert Cowham	Improve performance when inserting rows
#12	21660	Robert Cowham	Handle utf8 in python2 strings
#11	21656	Robert Cowham	Avoid SQL quoting issues with alternative insert statement.
#10	21643	Robert Cowham	Handle git-fusion entries (strip the json) - for --sql option
#9	21402	Robert Cowham	Filter out swarm commands Test for db content in a basic fashion.
#8	21393	Robert Cowham	Add a swarm test - large record. Improve performance by skipping easy matches early.
#7	21378	Robert Cowham	Batch updates and make SQL date arithmetic work with SQLite
#6	21374	Robert Cowham	Write to sqllite db
#5	21368	Robert Cowham	Make tests work under python3
#4	21367	Robert Cowham	Restructured for unit tests, and first unit tests.
#3	21366	Robert Cowham	Fix problems with NULL values and int
#2	21365	Robert Cowham	Equivalent of Ruby version on smaller sets
#1	21361	Robert Cowham	Sort of works...