/*
Package p4dlogparse parses Perforce text logs (not structured logs).
It assumes you have set configurable server=3 (or greater)
You may also have decided to set track=1 to get more detailed usage of
access to different tables.
See p4dlogparse_test.go for examples of log entries.
*/
package p4dlogparse
import (
"bufio"
"bytes"
"crypto/md5"
"encoding/hex"
"encoding/json"
"fmt"
"log"
"os"
"regexp"
"strconv"
"strings"
"time"
jsoniter "github.com/json-iterator/go"
)
// Ref format: Mon Jan 2 15:04:05 -0700 MST 2006
const p4timeformat = "2006/01/02 15:04:05"
var reCmd = regexp.MustCompile(`^\t(\d\d\d\d/\d\d/\d\d \d\d:\d\d:\d\d) pid (\d+) ([^ @]*)@([^ ]*) ([^ ]*) \[([^\]]*)\] \'([\w-]+) (.*)\'.*`)
var reCmdNoarg = regexp.MustCompile(`^\t(\d\d\d\d/\d\d/\d\d \d\d:\d\d:\d\d) pid (\d+) ([^ @]*)@([^ ]*) ([^ ]*) \[([^\]]*)\] \'([\w-]+)\'.*`)
var reCompute = regexp.MustCompile(`^\t(\d\d\d\d/\d\d/\d\d \d\d:\d\d:\d\d) pid (\d+) compute end ([0-9]+|[0-9]+\.[0-9]+|\.[0-9]+)s.*`)
var reCompleted = regexp.MustCompile(`^\t(\d\d\d\d/\d\d/\d\d \d\d:\d\d:\d\d) pid (\d+) completed ([0-9]+|[0-9]+\.[0-9]+|\.[0-9]+)s.*`)
var reJSONCmdargs = regexp.MustCompile(`^(.*) \{.*\}$`)
func toInt64(buf []byte) (n int64) {
for _, v := range buf {
n = n*10 + int64(v-'0')
}
return
}
// P4dParseOptions - Options for parsing - mainly for command line usage
type P4dParseOptions struct {
File string
testInput string // For testing only
}
// Block is a block of lines parsed from a file
type Block struct {
lineNo int64
lines [][]byte
}
func (block *Block) addLine(line []byte, lineNo int64) {
block.lines = append(block.lines, line)
if block.lineNo == 0 {
block.lineNo = lineNo
}
}
// Command is a command found in the block
type Command struct {
ProcessKey string `json:"processKey"`
Cmd []byte `json:"cmd"`
Pid int64 `json:"pid"`
LineNo int64 `json:"lineNo"`
User []byte `json:"user"`
Workspace []byte `json:"workspace"`
StartTime time.Time `json:"startTime"`
EndTime time.Time `json:"endTime"`
ComputeLapse []byte `json:"computeLapse"`
CompletedLapse []byte `json:"completedLapse"`
IP []byte `json:"ip"`
App []byte `json:"app"`
Args []byte `json:"args"`
duplicateKey bool
completed bool
hasTrackInfo bool
running int
}
func (c *Command) String() string {
var json = jsoniter.ConfigCompatibleWithStandardLibrary
j, _ := json.Marshal(c)
return string(j)
}
func (c *Command) setStartTime(t []byte) {
c.StartTime, _ = time.Parse(p4timeformat, string(t))
}
func (c *Command) setEndTime(t []byte) {
c.EndTime, _ = time.Parse(p4timeformat, string(t))
}
// MarshalJSON - handle time formatting
func (c *Command) MarshalJSON() ([]byte, error) {
return json.Marshal(&struct {
ProcessKey string `json:"processKey"`
Cmd string `json:"cmd"`
Pid int64 `json:"pid"`
LineNo int64 `json:"lineNo"`
User string `json:"user"`
Workspace string `json:"workspace"`
ComputeLapse string `json:"computeLapse"`
CompletedLapse string `json:"completedLapse"`
IP string `json:"ip"`
App string `json:"app"`
Args string `json:"args"`
StartTime string `json:"startTime"`
EndTime string `json:"endTime"`
}{
ProcessKey: c.ProcessKey,
Cmd: string(c.Cmd),
Pid: c.Pid,
LineNo: c.LineNo,
User: string(c.User),
Workspace: string(c.Workspace),
ComputeLapse: string(c.ComputeLapse),
CompletedLapse: string(c.CompletedLapse),
IP: string(c.IP),
App: string(c.App),
Args: string(c.Args),
StartTime: c.StartTime.Format(p4timeformat),
EndTime: c.EndTime.Format(p4timeformat),
})
}
func (c *Command) updateFrom(cmd *Command) {
// Do Nothing
}
// P4dOutputCallback is function to output JSON objects
type P4dOutputCallback func(output string)
// P4dFileParser manages state
type P4dFileParser struct {
lineNo int64
cmds map[int64]*Command
output P4dOutputCallback
currStartTime time.Time
pidsSeenThisSecond map[int64]bool
running int
}
// NewP4dFileParser - create and initialise properly
func NewP4dFileParser(callback P4dOutputCallback) *P4dFileParser {
var fp P4dFileParser
fp.cmds = make(map[int64]*Command)
fp.pidsSeenThisSecond = make(map[int64]bool)
fp.output = callback
return &fp
}
func (fp *P4dFileParser) addCommand(newCmd *Command, hasTrackInfo bool) {
newCmd.running = fp.running
if fp.currStartTime != newCmd.StartTime && newCmd.StartTime.After(fp.currStartTime) {
fp.currStartTime = newCmd.StartTime
fp.pidsSeenThisSecond = make(map[int64]bool)
}
if cmd, ok := fp.cmds[newCmd.Pid]; ok {
if cmd.ProcessKey == newCmd.ProcessKey {
fp.outputCmd(cmd)
fp.cmds[newCmd.Pid] = newCmd // Replace previous cmd with same PID
} else if bytes.Equal(newCmd.Cmd, []byte("rmt-FileFetch")) ||
bytes.Equal(newCmd.Cmd, []byte("rmt-Journal")) ||
bytes.Equal(newCmd.Cmd, []byte("pull")) {
if hasTrackInfo {
cmd.updateFrom(newCmd)
} else {
fp.outputCmd(cmd)
newCmd.duplicateKey = true
fp.cmds[newCmd.Pid] = newCmd // Replace previous cmd with same PID
}
} else {
cmd.updateFrom(newCmd)
}
} else {
fp.cmds[newCmd.Pid] = newCmd
if _, ok := fp.pidsSeenThisSecond[newCmd.Pid]; ok {
newCmd.duplicateKey = true
}
fp.pidsSeenThisSecond[newCmd.Pid] = true
fp.running++
fp.outputCompletedCommands()
}
}
// Output a single command to callback
func (fp *P4dFileParser) outputCmd(cmd *Command) {
if fp.output != nil {
lines := []string{}
lines = append(lines, fmt.Sprintf("%v", cmd))
if len(lines) > 0 && len(lines[0]) > 0 {
fp.output(strings.Join(lines, `\n`))
}
}
}
// Output all completed commands 3 or more seconds ago
func (fp *P4dFileParser) outputCompletedCommands() {
for _, cmd := range fp.cmds {
completed := false
if cmd.completed && (cmd.hasTrackInfo || fp.currStartTime.Sub(cmd.EndTime) >= 3*time.Second) {
completed = true
}
if !completed && (cmd.hasTrackInfo && fp.currStartTime.Sub(cmd.EndTime) >= 3*time.Second) {
completed = true
}
if completed {
fp.outputCmd(cmd)
delete(fp.cmds, cmd.Pid)
fp.running--
}
}
}
// Processes all remaining commands whether completed or not - intended for use at end
func (fp *P4dFileParser) outputRemainingCommands() {
for _, cmd := range fp.cmds {
fp.outputCmd(cmd)
}
fp.cmds = make(map[int64]*Command)
}
func (fp *P4dFileParser) updateComputeTime(pid int64, computeLapse []byte) {
sum := 0.0
if cmd, ok := fp.cmds[pid]; ok {
// sum all compute values for same command
sum, _ = strconv.ParseFloat(string(cmd.ComputeLapse), 32)
f, _ := strconv.ParseFloat(string(computeLapse), 32)
cmd.ComputeLapse = []byte(strconv.FormatFloat(sum+f, 'f', -1, 32))
}
}
func (fp *P4dFileParser) updateCompletionTime(pid int64, endTime []byte, completedLapse []byte) {
if cmd, ok := fp.cmds[pid]; ok {
cmd.CompletedLapse = endTime
cmd.setEndTime(endTime)
f, _ := strconv.ParseFloat(string(completedLapse), 32)
cmd.CompletedLapse = []byte(strconv.FormatFloat(f, 'f', -1, 32))
}
}
func (fp *P4dFileParser) processInfoBlock(block *Block) {
var cmd *Command
i := 0
for _, line := range block.lines[1:] {
i++
m := reCmd.FindSubmatch(line)
if len(m) == 0 {
m = reCmdNoarg.FindSubmatch(line)
}
if len(m) > 0 {
cmd = new(Command)
cmd.LineNo = block.lineNo
cmd.setStartTime(m[1])
cmd.Pid = toInt64(m[2])
cmd.User = m[3]
cmd.Workspace = m[4]
cmd.IP = m[5]
cmd.App = m[6]
cmd.Cmd = m[7]
// # following gsub required due to a 2009.2 P4V bug
// App = match.group(6).replace("\x00", "/")
if len(m) > 8 {
cmd.Args = m[8]
// Strip Swarm/Git Fusion commands with lots of json
sm := reJSONCmdargs.FindSubmatch(cmd.Args)
if len(sm) > 0 {
cmd.Args = sm[1]
}
}
h := md5.Sum(line)
cmd.ProcessKey = hex.EncodeToString(h[:])
fp.addCommand(cmd, false)
} else {
// process completed and computed
m := reCompleted.FindSubmatch(line)
if len(m) > 0 {
endTime := m[1]
pid := toInt64(m[2])
completedLapse := m[3]
fp.updateCompletionTime(pid, endTime, completedLapse)
} else {
m := reCompute.FindSubmatch(line)
if len(m) > 0 {
pid := toInt64(m[2])
computeLapse := m[3]
fp.updateComputeTime(pid, computeLapse)
}
}
}
}
}
func blankLine(line []byte) bool {
return len(line) == 0
}
var blockEnds = [][]byte{[]byte("Perforce server info:"), []byte("Perforce server error:"),
[]byte("locks acquired by blocking after"), []byte("Rpc himark:")}
func blockEnd(line []byte) bool {
if blankLine(line) {
return true
}
for _, str := range blockEnds {
if bytes.Equal(line, str) {
return true
}
}
return false
}
// P4LogParseFile - interface for parsing a specified file
func (fp *P4dFileParser) P4LogParseFile(opts P4dParseOptions) {
var scanner *bufio.Scanner
if len(opts.testInput) > 0 {
scanner = bufio.NewScanner(strings.NewReader(opts.testInput))
} else if opts.File == "-" {
scanner = bufio.NewScanner(os.Stdin)
} else {
file, err := os.Open(opts.File)
if err != nil {
log.Fatal(err)
}
defer file.Close()
reader := bufio.NewReaderSize(file, 1024*1024) // Read in chunks
scanner = bufio.NewScanner(reader)
}
fp.lineNo = 0
infoBlock := []byte("Perforce server info:")
block := new(Block)
for scanner.Scan() {
line := scanner.Bytes()
if blockEnd(line) {
if len(block.lines) > 0 {
if bytes.Equal(block.lines[0], infoBlock) {
fp.processInfoBlock(block)
} else if blankLine(block.lines[0]) {
fp.outputCompletedCommands()
}
}
block = new(Block)
block.addLine(line, fp.lineNo)
} else {
block.addLine(line, fp.lineNo)
}
fp.lineNo++
}
if len(block.lines) > 0 {
if bytes.Equal(block.lines[0], infoBlock) {
fp.processInfoBlock(block)
}
}
fp.outputRemainingCommands()
if err := scanner.Err(); err != nil {
fmt.Fprintf(os.Stderr, "reading file %s:%s\n", opts.File, err)
}
}