#!/usr/bin/env python3
# -*- encoding: UTF8 -*-
##############################################################################
#
# Copyright (c) 2008,2016 Perforce Software, Inc. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL PERFORCE SOFTWARE, INC. BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# = Description
#
# Output SQL queries generated by parsing commands logged in a Perforce
# server log file.
# The output can be imported directly in any SQL database.
#
# log2sql populates the following tables:
#
# +----------------+----------+------+-----+
# | process |
# +----------------+----------+------+-----+
# | Field | Type | Null | Key |
# +----------------+----------+------+-----+
# | processKey | char(32) | NO | PRI |
# | lineNumber | int | NO | PRI |
# | startTime | date | NO | |
# | endTime | date | YES | |
# | computedLapse | float | YES | |
# | completedLapse | float | YES | |
# | pid | int | NO | |
# | user | text | NO | |
# | workspace | text | NO | |
# | ip | text | NO | |
# | app | text | NO | |
# | cmd | text | NO | |
# | args | text | YES | |
# | uCpu | int | YES | |
# | sCpu | int | YES | |
# | diskIn | int | YES | |
# | diskOut | int | YES | |
# | ipcIn | int | YES | |
# | ipcOut | int | YES | |
# | maxRss | int | YES | |
# | pageFaults | int | YES | |
# | rpcMsgsIn | int | YES | |
# | rpcMsgsOut | int | YES | |
# | rpcSizeIn | int | YES | |
# | rpcSizeOut | int | YES | |
# | rpcHimarkFwd | int | YES | |
# | rpcHimarkRev | int | YES | |
# | rpcSnd | float | YES | |
# | rpcRcv | float | YES | |
# | running | int | NO | |
# | error | text | YES | |
# +----------------+----------+------+-----+
#
# +----------------+--------------+------+-----+
# | tableUse |
# +----------------+--------------+------+-----+
# | Field | Type | Null | Key |
# +----------------+--------------+------+-----+
# | processKey | char(32) | NO | PRI |
# | lineNumber | int | NO | PRI |
# | tableName | varchar(255) | NO | PRI |
# | pagesIn | int | YES | |
# | pagesOut | int | YES | |
# | pagesCached | int | YES | |
# | readLocks | int | YES | |
# | writeLocks | int | YES | |
# | getRows | int | YES | |
# | posRows | int | YES | |
# | scanRows | int | YES | |
# | putRows | int | YES | |
# | delRows | int | YES | |
# | totalReadWait | int | YES | |
# | totalReadHeld | int | YES | |
# | totalWriteWait | int | YES | |
# | totalWriteHeld | int | YES | |
# | maxReadWait | int | YES | |
# | maxReadHeld | int | YES | |
# | maxWriteWait | int | YES | |
# | maxWriteHeld | int | YES | |
# +----------------+--------------+------+-----+
#
# = Usage
#
# log2sql.rb -d <database_name> log_file
#
# = Requirements
#
# Ruby: http://www.ruby-lang.org
#
##############################################################################
from __future__ import print_function
import re
import sys
import os
import math
import argparse
import datetime
import hashlib
import sqlite3
python3 = sys.version_info[0] >= 3
RowsPerTransaction = 1000 # Commit every so many rows if in SQL mode
def escapeChar(str):
str = str.replace("\\", "\\\\")
str = str.replace("\"", "\\\"")
return str
def parseDateTime(str):
# \d+/\d+/\d+ \d+:\d+:\d+ - much faster just to do str.replace()
return "\"%s\"" % str.replace("/", "-")
def dateAdd(str, seconds):
"Add specified seconds to date in string format"
str = str.replace('"', '')
date = datetime.datetime.strptime(str, "%Y-%m-%d %H:%M:%S")
date = date + datetime.timedelta(seconds=seconds)
return date.strftime("\"%Y-%m-%d %H:%M:%S\"")
def nullValue(str):
"For SQL inserts"
if str == "NULL":
return None
return str
class Command:
processKey = None
pid = 0
completed = False # Set when a completed record is found
startTime = None
endTime = None
computedLapse = 0.0
completedLapse = 0.0
user = None
workspace = None
ip = None
app = None
name = None
args = None
uCpu = None
sCpu = None
diskIn = None
diskOut = None
ipcIn = None
ipcOut = None
maxRss = None
pageFaults = None
rpcMsgsIn = None
rpcMsgsOut = None
rpcSizeIn = None
rpcSizeOut = None
rpcHimarkFwd = None
rpcHimarkRev = None
rpcSnd = None
rpcRcv = None
running = 0
lineNumber = 0
error = None
def __init__(self, processKey, lineNumber, pid, startTime,
user, workspace, ip, app, name, args):
self.processKey = processKey
self.lineNumber = lineNumber
self.pid = pid
self.startTime = startTime
self.user = escapeChar(user)
self.workspace = escapeChar(workspace)
self.ip = ip
self.app = app
self.name = name
if args:
self.args = escapeChar(args)
else:
self.args = "NULL"
self.endTime = self.computedLapse = self.completedLapse = self.uCpu = "NULL"
self.sCpu = self.diskIn = self.diskOut = self.ipcIn = self.ipcOut = self.maxRss = "NULL"
self.pageFaults = self.rpcMsgsIn = self.rpcMsgsOut = self.rpcSizeOut = "NULL"
self.rpcSizeIn = self.rpcHimarkFwd = self.rpcHimarkRev = self.error = "NULL"
self.rpcSnd = self.rpcRcv = "NULL"
self.running = 0
def updateFrom(self, other):
"Update any potentiall null fields - see __init__"
for f in ["endTime", "computedLapse", "completedLapse", "uCpu",
"sCpu", "diskIn", "diskOut", "ipcIn", "ipcOut", "maxRss",
"pageFaults", "rpcMsgsIn", "rpcMsgsOut", "rpcSizeOut",
"rpcSizeIn", "rpcHimarkFwd", "rpcHimarkRev", "error",
"rpcSnd", "rpcRcv"]:
if getattr(other, f) != "NULL" and getattr(self, f) == "NULL":
setattr(self, f, getattr(other, f))
def setUsage(self, uCpu, sCpu, diskIn, diskOut, ipcIn, ipcOut, maxRss, pageFaults):
self.uCpu = uCpu
self.sCpu = sCpu
self.diskIn = diskIn
self.diskOut = diskOut
self.ipcIn = ipcIn
self.ipcOut = ipcOut
self.maxRss = maxRss
self.pageFaults = pageFaults
def setRpc(self, rpcMsgsIn, rpcMsgsOut, rpcSizeIn,
rpcSizeOut, rpcHimarkFwd, rpcHimarkRev, rpcSnd="NULL", rpcRcv="NULL"):
self.rpcMsgsIn = rpcMsgsIn
self.rpcMsgsOut = rpcMsgsOut
self.rpcSizeIn = rpcSizeIn
self.rpcSizeOut = rpcSizeOut
self.rpcHimarkFwd = rpcHimarkFwd
self.rpcHimarkRev = rpcHimarkRev
self.rpcSnd = rpcSnd
self.rpcRcv = rpcRcv
class Table:
processKey = None
lineNumber = 0
tableName = None
pagesIn = None
pagesOut = None
pagesCached = None
readLocks = None
writeLocks = None
getRows = None
posRows = None
scanRows = None
putRows = None
delRow = None
totalReadWait = None
totalReadHeld = None
totalWriteWait = None
totalWriteHeld = None
maxReadWait = None
maxReadHeld = None
maxWriteWait = None
maxWriteHeld = None
def __init__(self, processKey, lineNumber, tableName):
self.processKey = processKey
self.lineNumber = lineNumber
self.tableName = tableName
self.pagesIn = self.pagesOut = self.pagesCached = "NULL"
self.readLocks = self.writeLocks = self.getRows = self.posRows = self.scanRows = "NULL"
self.putRows = self.delRow = self.totalReadWait = self.totalReadHeld = "NULL"
self.totalWriteWait = self.totalWriteHeld = self.maxReadWait = "NULL"
self.maxReadHeld = self.maxWriteWait = self.maxWriteHeld = "NULL"
def setPages(self, pagesIn, pagesOut, pagesCached):
self.pagesIn = pagesIn
self.pagesOut = pagesOut
self.pagesCached = pagesCached
def setLocksRows(self, readLocks, writeLocks, getRows, posRows,
scanRows, putRows, delRow):
self.readLocks = readLocks
self.writeLocks = writeLocks
self.getRows = getRows
self.posRows = posRows
self.scanRows = scanRows
self.putRows = putRows
self.delRow = delRow
def setTotalLock(self, totalReadWait, totalReadHeld, totalWriteWait, totalWriteHeld):
self.totalReadWait = totalReadWait
self.totalReadHeld = totalReadHeld
self.totalWriteWait = totalWriteWait
self.totalWriteHeld = totalWriteHeld
def setMaxLock(self, maxReadWait, maxReadHeld, maxWriteWait, maxWriteHeld):
self.maxReadWait = maxReadWait
self.maxReadHeld = maxReadHeld
self.maxWriteWait = maxWriteWait
self.maxWriteHeld = maxWriteHeld
class Block:
def __init__(self):
self.lines = []
self.lineNo = 0
def addLine(self, line, lineNo):
self.lines.append(line)
# Only at start of block
if not self.lineNo:
self.lineNo = lineNo
class Log2sql:
logname = None
dbname = None
logfile = None
ckpSize = None
readBytes = None
mod = None
cmds = None
lineNo = 0
def __init__(self, options, instream=None, outstream=None):
self.dbname = options.dbname
self.options = options
self.outstream = outstream # For testing
if outstream is None:
if options.output:
if options.output == "-":
self.outstream = sys.stdout
else:
self.outstream = open(options.output, "w")
else:
self.outstream = None
if instream is None:
self.logfile = open(options.logfile[0], "r")
self.ckpSize = os.stat(options.logfile[0]).st_size
else:
self.logfile = instream
self.ckpSize = 500
if self.options.sql:
self.conn = sqlite3.connect("%s.db" % self.dbname)
self.conn.text_factory = str
self.cursor = self.conn.cursor()
self.cursor.execute("PRAGMA synchronous = OFF")
self.cursor.execute("PRAGMA journal_mode = MEMORY")
self.cmds = {}
self.tables = {}
self.readBytes = 0.0
self.mod = 10.0
self.running = 0
self.rowCount = 0
self.db_create_database()
self.db_create_process()
self.db_create_tableUse()
query = "BEGIN TRANSACTION;"
self.output(query)
if self.options.sql:
self.conn.execute(query)
self.output("Processing %s: 0%%" % self.logname, file=sys.stderr)
def outputRequired(self, file=sys.stdout):
return self.outstream or file == sys.stderr
def output(self, text, file=sys.stdout):
if (file == sys.stderr):
print(text, file=file)
elif self.outstream:
self.outstream.write("%s\n" % text)
def terminate(self):
self.flush_output()
self.db_commit(True)
if self.options.sql:
self.conn.commit()
self.conn.close()
def readLogfile(self):
line = self.logfile.readline()
if line:
self.lineNo += 1
# TODO
# if line.respond_to?(:encode)
# line = line.encode("utf-8", 'binary', :invalid => :replace, :undef => :replace)
#line = line.decode('utf-8', errors='ignore')
self.readBytes += len(line)
if math.floor(((self.readBytes / self.ckpSize) * 100.0) / self.mod) == 1.0:
self.output("...%d%%" % self.mod, file=sys.stderr)
# $stderr.flush
self.mod += 10
return line
def getLineNumber(self):
return self.lineNo
def db_commit_updates(self):
self.rowCount += 1
if self.rowCount % RowsPerTransaction == 0:
query = "COMMIT;"
if self.outputRequired():
self.output(query)
if self.options.sql:
self.conn.commit()
query = "BEGIN TRANSACTION;"
if self.outputRequired():
self.output(query)
if self.options.sql:
self.conn.execute(query)
def db_commit(self, state):
if (state):
query = "COMMIT;"
if self.outputRequired():
self.output(query)
if self.options.sql:
self.conn.commit()
query = "BEGIN TRANSACTION;"
if self.outputRequired():
self.output(query)
if self.options.sql:
self.conn.execute(query)
else:
query = "SET autocommit=0;"
if self.outputRequired():
self.output(query)
def db_create_database(self):
query = "CREATE DATABASE IF NOT EXISTS " + self.dbname + ";"
if self.outputRequired():
self.output(query)
query = "USE " + self.dbname + ";"
if self.outputRequired():
self.output(query)
def db_create_process(self):
query = "DROP TABLE IF EXISTS process;"
if self.outputRequired():
self.output(query)
query = "CREATE TABLE process (processkey CHAR(32) NOT NULL, lineNumber INT NOT NULL, pid INT NOT NULL, " \
"startTime DATETIME NOT NULL,endTime DATETIME NULL, computedLapse FLOAT NULL,completedLapse FLOAT NULL, " \
"user TEXT NOT NULL, workspace TEXT NOT NULL, ip TEXT NOT NULL, app TEXT NOT NULL, cmd TEXT NOT NULL, " \
"args TEXT NULL, uCpu INT NULL, sCpu INT NULL, diskIn INT NULL, diskOut INT NULL, ipcIn INT NULL, " \
"ipcOut INT NULL, maxRss INT NULL, pageFaults INT NULL, rpcMsgsIn INT NULL, rpcMsgsOut INT NULL, " \
"rpcSizeIn INT NULL, rpcSizeOut INT NULL, rpcHimarkFwd INT NULL, rpcHimarkRev INT NULL, " \
"rpcSnd FLOAT NULL, rpcRcv FLOAT NULL, running INT NULL, " \
"error TEXT NULL, PRIMARY KEY (processkey, lineNumber));"
if self.outputRequired():
self.output(query)
if self.options.sql:
try:
self.cursor.execute(query)
except:
pass
def db_create_tableUse(self):
query = "DROP TABLE IF EXISTS tableUse;"
if self.outputRequired():
self.output(query)
if self.options.sql:
self.cursor.execute(query)
query = "CREATE TABLE tableUse (processkey CHAR(32) NOT NULL, lineNumber INT NOT NULL, " \
"tableName VARCHAR(255) NOT NULL, pagesIn INT NULL, pagesOut INT NULL, pagesCached INT NULL, " \
"readLocks INT NULL, writeLocks INT NULL, getRows INT NULL, posRows INT NULL, scanRows INT NULL, " \
"putRows int NULL, delRows INT NULL, totalReadWait INT NULL, totalReadHeld INT NULL, " \
"totalWriteWait INT NULL, totalWriteHeld INT NULL, maxReadWait INT NULL, maxReadHeld INT NULL, " \
"maxWriteWait INT NULL, maxWriteHeld INT NULL, PRIMARY KEY (processkey, lineNumber, tableName));"
if self.outputRequired():
self.output(query)
if self.options.sql:
try:
self.cursor.execute(query)
except:
pass
def addCommand(self, cmd, vtrack):
cmd.running = self.running
# If we are re-using pid then process previous command
# Or process 'rmt-FileFetch' because they don't have 'completed' records
if cmd.pid in self.cmds:
if cmd.processKey != self.cmds[cmd.pid].processKey:
self.sql_process_insert(self.cmds[cmd.pid])
self.cmds[cmd.pid] = cmd # replace
elif cmd.name == 'rmt-FileFetch' and not vtrack:
self.sql_process_insert(self.cmds[cmd.pid])
self.cmds[cmd.pid] = cmd # replace
else:
self.cmds[cmd.pid].updateFrom(cmd)
else:
self.cmds[cmd.pid] = cmd
self.running += 1
self.processCompletedCommands()
def processCompletedCommands(self):
"""Remove any commands with completed state - should only be called by addCommand when
we are adding a new command - at that point we know there are no new records to come for any previous
ones as even track output will have been processed"""
for pid in self.cmds.keys():
if self.cmds[pid].completed:
self.sql_process_insert(self.cmds[pid])
del self.cmds[pid]
self.running -= 1
def updateComputedTime(self, pid, computedLapse):
if pid in self.cmds:
# sum all the compute values of a same command
if self.cmds[pid].computedLapse == "NULL":
sum = 0.0
else:
sum = float(self.cmds[pid].computedLapse)
sum += float(computedLapse)
self.cmds[pid].computedLapse = str(sum)
def updateCompletionTime(self, pid, endTime, completedLapse):
if pid in self.cmds:
self.cmds[pid].completed = True
self.cmds[pid].endTime = endTime
self.cmds[pid].completedLapse = str(float(completedLapse))
def processTrackRecords(self, cmd, lines):
self.tables = {}
tableName = None
rpcMsgsIn = rpcMsgsOut = rpcSizeIn = rpcSizeOut = rpcSnd = rpcRcv = 0
rpcHimarkFwd = rpcHimarRev = uCpu = 0
sCpu = diskIn = diskOut = ipcIn = ipcOut = maxRss = pageFaults = 0
readLocks = writeLocks = getRows = posRows = scanRows = 0
putRows = delRow = totalReadWait = totalReadHeld = 0
totalWriteWait = totalWriteHeld = maxReadWait = 0
maxReadHeld = maxWriteWait = maxWriteHeld = 0
# Use line number from original cmd if appropriate
if cmd.pid in self.cmds and cmd.processKey == self.cmds[cmd.pid].processKey:
cmd.lineNumber = self.cmds[cmd.pid].lineNumber
for line in lines:
if not RE_TRACK.match(line):
break
gotMatch = False
match = RE_TRACK_LAPSE.match(line)
if match:
gotMatch = True
cmd.completedLapse = match.group(1)
if not gotMatch:
match = RE_FAILED_AUTH.match(line)
if match:
cmd.error = "\"" + match.group(1) + "\""
if not gotMatch:
match = RE_KILLED_BY.match(line)
if match:
gotMatch = True
cmd.error = "\"" + match.group(1) + "\""
if not gotMatch:
match = RE_EXITED.match(line)
if match:
gotMatch = True
cmd.error = "\"" + match.group(1) + "\""
if not gotMatch:
match = RE_TRACK_USAGE.match(line)
if match:
gotMatch = True
cmd.setUsage(match.group(1), match.group(2), match.group(3), match.group(4),
match.group(5), match.group(6), match.group(7), match.group(8))
if not gotMatch:
match = RE_TRACK_RPC2.match(line)
if match:
gotMatch = True
cmd.setRpc(match.group(1), match.group(2), match.group(3),
match.group(4), match.group(5), match.group(6),
match.group(7), match.group(8))
if not gotMatch:
match = RE_TRACK_RPC.match(line)
if match:
gotMatch = True
cmd.setRpc(match.group(1), match.group(2), match.group(3),
match.group(4), match.group(5), match.group(6))
if not gotMatch:
match = RE_TRACK_TABLE.match(line)
if match:
gotMatch = True
tableName = match.group(1)
self.tables[tableName] = Table(cmd.processKey,
cmd.lineNumber, tableName)
if not gotMatch:
match = RE_TRACK_PAGES.match(line)
if match:
gotMatch = True
if tableName in self.tables:
self.tables[tableName].setPages(match.group(1), match.group(2), match.group(3))
if not gotMatch:
match = RE_TRACK_LOCKS_ROWS.match(line)
if match:
gotMatch = True
if tableName in self.tables:
self.tables[tableName].setLocksRows(match.group(1), match.group(2),
match.group(3), match.group(4),
match.group(5), match.group(6),
match.group(7))
if not gotMatch:
match = RE_TRACK_TOTAL_LOCK.match(line)
if match:
gotMatch = True
if tableName in self.tables:
self.tables[tableName].setTotalLock(match.group(1), match.group(2),
match.group(3), match.group(4))
# The max is only reported if there is more than one
# lock taken on the table
if (self.tables[tableName].readLocks == "NULL" or
int(self.tables[tableName].readLocks) < 2) and \
(self.tables[tableName].writeLocks == "NULL" or
int(self.tables[tableName].writeLocks) < 2):
self.tables[tableName].setMaxLock(match.group(1), match.group(2),
match.group(3), match.group(4))
if not gotMatch:
match = RE_TRACK_MAX_LOCK.match(line)
if match:
gotMatch = True
if tableName in self.tables:
self.tables[tableName].setMaxLock(match.group(1), match.group(2),
match.group(3), match.group(4))
if cmd.completedLapse != "NULL":
cmd.endTime = dateAdd(cmd.startTime, int(cmd.completedLapse))
else:
cmd.endTime = cmd.startTime
self.addCommand(cmd, True)
try:
if python3:
for k, v in self.tables.items():
self.sql_tableUse_insert(v)
else:
for table in self.tables.values():
self.sql_tableUse_insert(table)
except Exception as e:
print("%s, lineNo %d, cmd lineNo %d, %s, %s, %s" % (str(e), self.lineNo, cmd.lineNumber, cmd.processKey, cmd.name, cmd.args))
def flush_output(self):
if python3:
for p, v in self.cmds.items():
self.sql_process_insert(v)
else:
for p, v in self.cmds.iteritems():
self.sql_process_insert(v)
self.output("", file=sys.stderr)
def sql_process_insert(self, cmd):
if self.outputRequired():
query = 'INSERT IGNORE INTO process VALUES ("%s",%d,%d,%s,%s,%s,%s,"%s","%s","%s","%s","%s","%s",%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%d,%s);' % \
(cmd.processKey, cmd.lineNumber, cmd.pid,
cmd.startTime, cmd.endTime, cmd.computedLapse,
cmd.completedLapse, cmd.user, cmd.workspace,
cmd.ip, cmd.app, cmd.name, cmd.args, cmd.rpcMsgsIn,
cmd.rpcMsgsOut, cmd.rpcSizeIn, cmd.rpcSizeOut,
cmd.rpcHimarkFwd, cmd.rpcHimarkRev, cmd.rpcSnd, cmd.rpcRcv, cmd.uCpu,
cmd.sCpu, cmd.diskIn, cmd.diskOut, cmd.ipcIn, cmd.ipcOut,
cmd.maxRss, cmd.pageFaults, cmd.running, cmd.error)
self.output(query)
if self.options.sql:
try:
self.cursor.execute('INSERT INTO process VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)',
(cmd.processKey, cmd.lineNumber, cmd.pid,
cmd.startTime, nullValue(cmd.endTime), nullValue(cmd.computedLapse),
nullValue(cmd.completedLapse), nullValue(cmd.user), nullValue(cmd.workspace),
nullValue(cmd.ip), nullValue(cmd.app), nullValue(cmd.name), nullValue(cmd.args),
nullValue(cmd.rpcMsgsIn),
nullValue(cmd.rpcMsgsOut), nullValue(cmd.rpcSizeIn), nullValue(cmd.rpcSizeOut),
nullValue(cmd.rpcHimarkFwd), nullValue(cmd.rpcHimarkRev),
nullValue(cmd.rpcSnd), nullValue(cmd.rpcRcv), nullValue(cmd.uCpu),
nullValue(cmd.sCpu), nullValue(cmd.diskIn), nullValue(cmd.diskOut),
nullValue(cmd.ipcIn), nullValue(cmd.ipcOut),
nullValue(cmd.maxRss), nullValue(cmd.pageFaults), nullValue(cmd.running),
nullValue(cmd.error)))
except Exception as e:
raise Exception("%s: %s, %d, %d, %s" % (str(e), cmd.processKey, cmd.lineNumber, cmd.pid, cmd.name))
self.db_commit_updates()
def sql_tableUse_insert(self, tab):
if self.outputRequired():
query = 'INSERT IGNORE INTO tableUse VALUES ("%s",%d,"%s",%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,' \
'%s,%s,%s,%s);' % \
(tab.processKey, tab.lineNumber, tab.tableName, tab.pagesIn,
tab.pagesOut, tab.pagesCached, tab.readLocks,
tab.writeLocks, tab.getRows, tab.posRows, tab.scanRows,
tab.putRows, tab.delRow,
tab.totalReadWait, tab.totalReadHeld,
tab.totalWriteWait, tab.totalWriteHeld,
tab.maxReadWait, tab.maxReadHeld,
tab.maxWriteWait, tab.maxWriteHeld)
self.output(query)
if self.options.sql:
try:
self.cursor.execute('INSERT INTO tableUse VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)',
(tab.processKey, tab.lineNumber, tab.tableName,
nullValue(tab.pagesIn), nullValue(tab.pagesOut), nullValue(tab.pagesCached),
nullValue(tab.readLocks), nullValue(tab.writeLocks), nullValue(tab.getRows), nullValue(tab.posRows),
nullValue(tab.scanRows), nullValue(tab.putRows), nullValue(tab.delRow),
nullValue(tab.totalReadWait), nullValue(tab.totalReadHeld),
nullValue(tab.totalWriteWait), nullValue(tab.totalWriteHeld),
nullValue(tab.maxReadWait), nullValue(tab.maxReadHeld),
nullValue(tab.maxWriteWait), nullValue(tab.maxWriteHeld)))
except Exception as e:
raise Exception("%s: %s, %d, %s" % (str(e), tab.processKey, tab.lineNumber, tab.tableName))
self.db_commit_updates()
def processInfoBlock(self, block):
cmd = None
i = 0
# First line of block is info line - process the rest
for line in block.lines[1:]:
i += 1
# Check for track lines and once we have found one, process them all and finish
if cmd:
match = RE_TRACK.match(line)
if match:
self.processTrackRecords(cmd, block.lines[i:])
break # Block has been processed
match = RE_CMD_NOARG.match(line)
if not match:
match = RE_CMD.match(line)
if match:
pid = int(match.group(2))
startTime = parseDateTime(match.group(1))
user = match.group(3)
workspace = match.group(4)
ip = match.group(5)
# following gsub required due to a 2009.2 P4V bug
app = match.group(6).replace("\x00", "/")
name = match.group(7)
cmdArgs = None
if len(match.groups()) == 8:
cmdArgs = match.group(8)
# Strip Swarm/Git Fusion commands with lots of json
smatch = re.search(RE_JSON_CMDARGS, cmdArgs)
if smatch:
cmdArgs = smatch.group(1)
m = hashlib.md5()
line = re.sub(r'[^\x00-\x7F]', '?', line) # Do this so we can compare results with ruby version
if python3:
m.update(line.encode())
else:
m.update(line)
processKey = m.hexdigest()
cmd = Command(processKey, block.lineNo, pid, startTime,
user, workspace, ip, app, name, cmdArgs)
self.addCommand(cmd, False)
continue
# Pattern matching a completed line
match = RE_COMPLETED.match(line)
if match:
pid = int(match.group(2))
endTime = parseDateTime(match.group(1))
completedLapse = match.group(3)
self.updateCompletionTime(pid, endTime, completedLapse)
continue
# Pattern matching a compute line
match = RE_COMPUTE.match(line)
if match:
pid = int(match.group(2))
computedLapse = float(match.group(3))
self.updateComputedTime(pid, computedLapse)
continue
def blockEnd(self, line):
"Blank line or one of terminators"
terminators = ["Perforce server info:",
"Perforce server error:"
"locks acquired by blocking after",
"Rpc himark:"]
if line == "" or line == "\n" or line == "\r\n":
return True
for t in terminators:
if line[:len(t)] == t:
return True
return False
def blockInfo(self, line):
t = "Perforce server info:"
if line[:len(t)] == t:
return True
return False
def processLog(self):
block = Block()
line = self.readLogfile()
while line:
if self.blockEnd(line):
if block.lines:
if self.blockInfo(block.lines[0]):
self.processInfoBlock(block)
# TODO: process errors etc
block = Block()
block.addLine(line, self.lineNo)
else:
block.addLine(line, self.lineNo)
line = self.readLogfile()
if block.lines:
if self.blockInfo(block.lines[0]):
self.processInfoBlock(block)
self.terminate()
######################
# START OF MAIN SCRIPT
######################
RE_INFO = re.compile('^Perforce server info:')
RE_EMPTY = re.compile('^\s*$')
RE_JSON_CMDARGS = re.compile('^(.*) \{.*\}$')
RE_CMD = re.compile('^\t(\d+/\d+/\d+ \d+:\d+:\d+) pid (\d+) (.*)@(.*) (.*) \[(.*)\] \'(\w+-\w+) (.*)\'.*')
RE_CMD_NOARG = re.compile('^\t(\d+/\d+/\d+ \d+:\d+:\d+) pid (\d+) (.*)@(.*) (.*) \[(.*)\] \'(\w+-\w+)\'.*')
RE_COMPUTE = re.compile('^\t(\d+/\d+/\d+ \d+:\d+:\d+) pid (\d+) compute end ([0-9]+|[0-9]+\.[0-9]+|\.[0-9]+)s.*')
RE_COMPLETED = re.compile('^\t(\d+/\d+/\d+ \d+:\d+:\d+) pid (\d+) completed ([0-9]+|[0-9]+\.[0-9]+|\.[0-9]+)s.*')
RE_TRACK = re.compile('^---|^locks acquired by blocking after 3 non-blocking attempts')
RE_TRACK_LAPSE = re.compile('^--- lapse (\d+)')
RE_TRACK_RPC = re.compile('^--- rpc msgs/size in\+out (\d+)\+(\d+)/(\d+)mb\+(\d+)mb himarks (\d+)/(\d+)')
RE_TRACK_RPC2 = re.compile('^--- rpc msgs/size in\+out (\d+)\+(\d+)/(\d+)mb\+(\d+)mb himarks (\d+)/(\d+) snd/rcv ([0-9]+|[0-9]+\.[0-9]+|\.[0-9]+)s/([0-9]+|[0-9]+\.[0-9]+|\.[0-9]+)s')
RE_TRACK_USAGE = re.compile('^--- usage (\d+)\+(\d+)us (\d+)\+(\d+)io (\d+)\+(\d+)net (\d+)k (\d+)pf')
RE_FAILED_AUTH = re.compile('^--- (failed authentication check)')
RE_KILLED_BY = re.compile('^--- (killed by .*)')
RE_EXITED = re.compile('^--- (exited on fatal server error)')
RE_TRACK_TABLE = re.compile('^--- db.([a-zA-Z]*)')
RE_TRACK_PAGES = re.compile('^--- pages in\+out\+cached (\d+)\+(\d+)\+(\d+)')
RE_TRACK_LOCKS_ROWS = re.compile(
'^--- locks read/write (\d+)/(\d+) rows get\+pos\+scan put\+del (\d+)\+(\d+)\+(\d+) (\d+)\+(\d+)')
RE_TRACK_TOTAL_LOCK = re.compile('^--- total lock wait\+held read/write (\d+)ms\+(\d+)ms/(\d+)ms\+(\d+)ms')
RE_TRACK_MAX_LOCK = re.compile(
'^--- max lock wait\+held read/write (\d+)ms\+(\d+)ms/(\d+)ms\+(\d+)ms|--- locks wait+held read/write (\d+)ms\+(\d+)ms/(\d+)ms\+(\d+)ms')
def main():
parser = argparse.ArgumentParser(add_help=True)
parser.add_argument('logfile', nargs=1, help='log file to process')
parser.add_argument('-d', '--dbname', help="Database name to use", default=None)
parser.add_argument('-o', '--output', help="Name of file to print SQL to (if not specified then no output)", default=None)
parser.add_argument('-s', '--sql', help="Use local SQL Lite database", action='store_true', default=False)
try:
options = parser.parse_args()
except:
parser.print_help()
sys.exit(1)
if not options.output and not options.sql:
print("Please specify either an output file or -s/--sql")
parser.print_help()
sys.exit(1)
log2sql = Log2sql(options)
log2sql.processLog()
if __name__ == '__main__':
main()
| # | Change | User | Description | Committed | |
|---|---|---|---|---|---|
| #5 | 23765 | Robert Cowham | Moved things down one level to psla dir to make it easier to see what belongs | ||
| #4 | 23719 | Robert Cowham | Log warnings for insert exceptions | ||
| #3 | 23683 | Robert Cowham | Bring in latest changes - performance improvements | ||
| #2 | 23636 | Robert Cowham |
Merge in latests changes to log2sql.py - much faster - better at detecting completed commands and out of order vtrack - full test harness |
||
| #1 | 22465 | Robert Cowham |
Added tests for r99.2 created depot (no filesize/digest stored) Fixed problem with preview failing comparison Add --p4d option to tests to use different p4d version |
||
| //guest/robert_cowham/perforce/utils/log_analysis/log2sql.py | |||||
| #22 | 21796 | Robert Cowham | Handle rmt-FileFetch records which may be duplicates as they don't have 'completed' entries. | ||
| #21 | 21780 | Robert Cowham |
Don't drop tables when creating sqlite db Performance tweaks after profiling - particularly date parsing. |
||
| #20 | 21713 | Robert Cowham | Ignore temporary error inserting into trackuse table | ||
| #19 | 21712 | Robert Cowham | Check for correct line number when looking to update for track entries | ||
| #18 | 21708 | Robert Cowham |
Tweak parameters to improve error messages. Make output to stdout optional |
||
| #17 | 21705 | Robert Cowham |
Minor refactoring for clarity. Process all completed records as we go. |
||
| #16 | 21702 | Robert Cowham |
Rework algorithm to process blocks at a time, and to avoid the SQL update statements. Risk of increased memory vs improved speed. |
||
| #15 | 21697 | Robert Cowham | Parse rcpSnd/rpcRcv and add to database. | ||
| #14 | 21694 | Robert Cowham | Handle null values properly | ||
| #13 | 21672 | Robert Cowham | Improve performance when inserting rows | ||
| #12 | 21660 | Robert Cowham | Handle utf8 in python2 strings | ||
| #11 | 21656 | Robert Cowham | Avoid SQL quoting issues with alternative insert statement. | ||
| #10 | 21643 | Robert Cowham | Handle git-fusion entries (strip the json) - for --sql option | ||
| #9 | 21402 | Robert Cowham |
Filter out swarm commands Test for db content in a basic fashion. |
||
| #8 | 21393 | Robert Cowham |
Add a swarm test - large record. Improve performance by skipping easy matches early. |
||
| #7 | 21378 | Robert Cowham | Batch updates and make SQL date arithmetic work with SQLite | ||
| #6 | 21374 | Robert Cowham | Write to sqllite db | ||
| #5 | 21368 | Robert Cowham | Make tests work under python3 | ||
| #4 | 21367 | Robert Cowham | Restructured for unit tests, and first unit tests. | ||
| #3 | 21366 | Robert Cowham | Fix problems with NULL values and int | ||
| #2 | 21365 | Robert Cowham | Equivalent of Ruby version on smaller sets | ||
| #1 | 21361 | Robert Cowham | Sort of works... | ||