#! /usr/bin/env python
# $Id: //guest/thomas_quinot/perforce/utils/metrics/changemetrics.py#1 $
##
## Written by Scott Pasnikowski around the time of 4/22/99
## @symantec corp. Home of the Norton utilities
##
## Under duress of pesky QA people...
##
## This comes with no guarantee whatsoever... on any level.
##
## Consider this to be under the linux type liscense thingy
## ( Don't recall what its called GNU or copyleft or whatever )
## and if anyone complains my manager du jour said I could give it away.
## ( he really did )
##
import sys, os, string, re
# everybody is using this
expFileNameField = re.compile( r'^(//.+)/(.+)' )
# This func corresponds to p4 diff2 with the -dn switch
def extract_metrics_counts_rcs_filetofile( depot_file_and_rev1, depot_file_and_rev2 ):
# Does a p4 changes depot_root@label_one depot_root@label_two
# depot_root is used to limit the diff range to a single "project"
# or something even smaller.. like a single file
# we then parse the output and add up the totals
command = 'p4 diff2 -dn \"' + depot_file_and_rev1 + '\" \"' + depot_file_and_rev2 + '\"'
lines_added = 0
lines_changed = 0
lines_deleted = 0
#sys.stdout.write('Extraction function using ' + command +' \n' )
# break the line up according to...
# sample line: a85 2
# operation (line)location (Number of lines affected)count
p = re.compile( r'^([ad])([0-9]+) (\d+)' )
for line in os.popen( command,'r').readlines():
tmp = line[0:1]
if tmp == 'a' or tmp == 'd':
# only do this if we have a match otherwise groups() will blow chunks diffing an rtf
m = p.match( line )
if m:
(operation, location, count) = re.match( r'^([ad])([0-9]+) (\d+)', line).groups()
#sys.stdout.write( '>' + operation + '< >' + location + '< >' + count + '<\n' )
if tmp == 'a':
lines_added = lines_added + int( count )
elif tmp == 'd':
lines_deleted = lines_deleted + int( count )
return lines_added, lines_deleted
# This func corresponds to p4 diff2 with the -dc switch
def extract_metrics_counts_context_filetofile( depot_file_and_rev1, depot_file_and_rev2 ):
# Does a p4 changes depot_root@label_one depot_root@label_two
# depot_root is used to limit the diff range to a single "project"
# or something even smaller.. like a single file
# we then parse the output and add up the totals
command = 'p4 diff2 -dc \"' + depot_file_and_rev1 + '\" \"' + depot_file_and_rev2 + '\"'
lines_added = 0
lines_changed = 0
lines_deleted = 0
#sys.stdout.write('Extraction function using ' + command +' \n' )
for line in os.popen( command,'r').readlines():
# sample line: Change number on date by name@machine 'comment'
# dummy ch#
#sys.stdout.write( line + '\n' )
tmp = line[0:1]
if tmp == '+':
lines_added = lines_added + 1
elif tmp == '-':
lines_deleted = lines_deleted + 1
elif tmp == '!':
lines_changed = lines_changed +1
return lines_added,lines_changed,lines_deleted
def extract_metrics_counts_summary_filetofile( depot_file_and_rev1, depot_file_and_rev2 ):
# Does a p4 changes depot_root@label_one depot_root@label_two
# depot_root is used to limit the diff range to a single "project"
# or something even smaller.. like a single file
# we then parse the output and add up the totals
command = 'p4 diff2 -ds \"' + depot_file_and_rev1 + '\" \"' + depot_file_and_rev2 + '\"'
lines_added = 0
add_chunks = 0
lines_deleted = 0
delete_chunks = 0
lines_changed = 0
change_chunks = 0
extra_data = 0
#sys.stdout.write('Extraction function using ' + command +' \n' )
# This will match lines starting with
exp1 = re.compile( r'^["add""deleted""changed]"' )
#
# The next 3 eat the add deleted and changed lines
# it could be one expression I suppose
#
exp2 = re.compile( r'^(add) (\d+) (chunks) (\d+)' )
exp3 = re.compile( r'^(deleted) (\d+) (chunks) (\d+)' )
exp4 = re.compile( r'^(changed) (\d+) (chunks) (\d+) / (\d+)' )
for line in os.popen( command,'r').readlines():
# only do this if we have a match
m2 = exp2.search( line )
m3 = exp3.search( line )
m4 = exp4.search( line )
if m2:
( operation, count1, chunks, count2 ) = m2.groups()
add_chunks = add_chunks + int( count1 )
lines_added = lines_added + int( count2 )
#sys.stdout.write( count2 + '\n' )
elif m3:
( operation, count1, chunks, count2 ) = m3.groups()
delete_chunks = delete_chunks + int( count1 )
lines_deleted = lines_deleted + int( count2 )
#sys.stdout.write( count2 + '\n' )
elif m4:
( operation, count1, chunks, count2, count3 ) = m4.groups()
change_chunks = change_chunks + int( count1 )
extra_data = extra_data + int( count2 )
lines_changed = lines_changed + int( count3 )
#sys.stdout.write( count3 + '\n' )
return lines_added,add_chunks,lines_deleted,delete_chunks,lines_changed,change_chunks, extra_data
# This func corresponds to p4 diff2 with the -du switch
def extract_metrics_counts_unified_filetofile( depot_file_and_rev1, depot_file_and_rev2 ):
# Does a p4 changes depot_root@label_one depot_root@label_two
# depot_root is used to limit the diff range to a single "project"
# or something even smaller.. like a single file
# we then parse the output and add up the totals
command = 'p4 diff2 -du \"' + depot_file_and_rev1 + '\" \"' + depot_file_and_rev2 + '\"'
lines_added = 0
lines_deleted = 0
#sys.stdout.write('Extraction function using ' + command +' \n' )
for line in os.popen( command,'r').readlines():
# sample line: Change number on date by name@machine 'comment'
# dummy ch#
#sys.stdout.write( line + '\n' )
tmp = line[0:1]
if tmp == '+':
lines_added = lines_added + 1
elif tmp == '-':
lines_deleted = lines_deleted + 1
return lines_added, lines_deleted
# This func generates a list of change numbers that have been
# added between the 2 points.
# It's equivalent to p4 changes //depot/some
def extract_change_numbers( depot_root, label_one, label_two ):
# Does a p4 changes depot_root@label_one depot_root@label_two
# depot_root is used to limit the diff range to a single "project"
# or something even smaller.. like a single file
# we then parse the output and add up the totals
command1 = 'p4 changes ' + depot_root + label_one
command2 = 'p4 changes ' + depot_root + label_two
first_list = []
second_list = []
difflist = []
# break the line up according to...
#
# This seems to work best with 2 groups
expChangeNumber = re.compile( r'^(Change )(\d+)' )
#sys.stdout.write('Extraction function using ' + command1 +' \n' )
for line in os.popen( command1,'r').readlines():
s1 = expChangeNumber.search( line )
( junk, changeNumber1 ) = s1.groups()
#sys.stdout.write('Change Number list 1-> ' + `changeNumber1` +' \n' )
first_list.append( changeNumber1 )
#sys.stdout.write('Extraction function using ' + command2 +' \n' )
for line in os.popen( command2,'r').readlines():
s1 = expChangeNumber.search( line )
( junk, changeNumber2 ) = s1.groups()
#sys.stdout.write('Change Number list 2-> ' + `changeNumber2` +' \n' )
second_list.append( changeNumber2 )
# The following is a very basic diff routine for 2 lists
# Sum both lists then if consectutive elements match
# we will not add either to final summation list
summation = first_list + second_list
summation.sort()
index = 0
difflist_length = 0
while index <= len( summation ):
#sys.stdout.write( 'Index ' + `index` + '\n' )
if index + 1 >= len( summation ):
if index != len( summation ):
difflist_length = difflist_length + 1
difflist.append( summation[ index ] )
break
else:
if summation[ index ] == summation[ index + 1 ]:
index = index + 2
#sys.stdout.write( 'Index is ' + `index` + ' After jumpin over\n' )
else:
#sys.stdout.write( summation[ index ] + '\n' )
difflist_length = difflist_length + 1
difflist.append( summation[ index ] )
index = index + 1
# Lets print out some summary data
sys.stdout.write( '\n' + `difflist_length` + ' Changelists submitted:\n' )
for changeNumber in difflist:
sys.stdout.write( changeNumber + ' ' )
sys.stdout.write( '\n\n' )
return difflist
def gen_change_data( listof_changenums ):
# Parse the data for each change
command = 'p4 describe -s '
change_dict = {}
# break the line up according to...
#
# This seems to work best with 2 groups
expChangeFields = re.compile( r'^Change (\d+) by (.+) on (\d+)/(\d+)/(\d+) (\d+):(\d+):(\d+)' )
expFileFields = re.compile( r'^... (.+)#(\d+) (.+)' )
expFileNameField = re.compile( r'^(//.+)/(.+)' )
#sys.stdout.write('Extraction function using ' + command +' \n' )
# for every change number do a p4 describe
#
for num in listof_changenums:
# Now for each p4 describe, suck in all the output lines
#
list_of_files = []
for line in os.popen( command + num,'r').readlines():
# Now use re to recognize the header line or the file lines
#
sdata = expChangeFields.search( line )
sfile = expFileFields.search( line )
if sdata:
# Aha! this is first line that contains the user name stuff...
#
( changeNumber, user, year, month, day, hour, min, sec ) = sdata.groups()
if sfile:
# This is a line listing a file that belongs to this change
#
( depotPath, revnum, action ) = sfile.groups()
sfileandpath = expFileNameField.search( depotPath )
if sfileandpath:
( pathOnly, fileName ) = sfileandpath.groups()
#sys.stdout.write( pathOnly + ',' + fileName + ',' )
list_of_files.append( [ depotPath, revnum, action, user ] )
#sys.stdout.write( revnum + ',' +
# action + ',' +
# changeNumber + ',' +
# user + ',' +
# year + ',' +
# month + ',' +
# day + ',' +
# hour + ',' +
# min + ',' +
# sec + '\n' )
# after processing the change list add the data gained into the
# into the dictionary of changlistss
# Use the KeyError exception to add keys that are missing
# otherwise just sum the list of files for each user
#
try:
change_dict[ `changeNumber` ] = change_dict[ `changeNumber` ] + list_of_files
except KeyError:
change_dict[ `changeNumber` ] = list_of_files
#temp_list = []
#for stuff in user_dict.keys():
# sys.stdout.write( '\n\nUser --> ' + stuff + '\n\n' )
# temp_list = user_dict[ stuff ]
# temp_list.sort()
# for junk in temp_list:
# sys.stdout.write( '\t' + junk + '\n' )
return change_dict
def gen_metrics_data( data_dict ):
temp_list = []
#
# For every entry in the dictionary
# Each key being a different User
for stuff in data_dict.keys():
#
# Copy the list out of the dictionary so I don't have to
# use [][][]
temp_list = data_dict[ stuff ]
temp_list.sort()
#
# Lets start writing our report data
sys.stdout.write( '\n\nChange --> ' + stuff + ' Applied to ' + `len( temp_list )` + ' Files ' + \
'\t By User --> ' + data_dict[ stuff ][0][3] + '\n\n' )
#
# cleanup after last pass
rcs_adds = rcs_deletes = ctx_adds = ctx_changes = ctx_deletes =0
sum_adds = sum_addchunks = sum_deletes = sum_deletechunks = sum_changes = sum_changechunks = sum_extra = 0
uni_adds = uni_deletes = 0
#
#
for junk in temp_list:
sys.stdout.write( '\t' + junk[0] + ' - ' + junk[1] + ' - ' + junk[2] + '\n' )
for junk in temp_list:
#sys.stdout.write( '\t' + junk[0] + ' - ' + junk[1] + ' - ' + junk[2] + '\n' )
#
# This file had a rev that can be diffed (as evidenced by the edit)
if junk[2] == 'edit':
# Only print data if you have an edit in the list that can provide data
ChangeConfirmed = 1
rev_number1 = int( junk[1] )
# The implication here is that in any changelist your only submitting the next rev
rev_number2 = rev_number1 - 1
( adds, deletes ) = extract_metrics_counts_rcs_filetofile( junk[0] + '#' + `rev_number2`, junk[0] + '#' + `rev_number1` )
rcs_adds = rcs_adds + adds
rcs_deletes = rcs_deletes + deletes
( adds, changes, deletes) = extract_metrics_counts_context_filetofile( junk[0] + '#' + `rev_number2`, junk[0] + '#' + `rev_number1` )
ctx_adds = ctx_adds + adds
ctx_changes = ctx_changes + changes
ctx_deletes = ctx_deletes + deletes
( adds, addchunks, deletes, deletechunks, changes, changechunks, extra ) = extract_metrics_counts_summary_filetofile( junk[0] + '#' + `rev_number2`, junk[0] + '#' + `rev_number1` )
sum_adds = sum_adds + adds
sum_addchunks = sum_addchunks + addchunks
sum_deletes = sum_deletes + deletes
sum_deletechunks = sum_deletechunks + deletechunks
sum_changes = sum_changes + changes
sum_changechunks = sum_changechunks + changechunks
sum_extra = sum_extra + extra
( adds, deletes) = extract_metrics_counts_unified_filetofile( junk[0] + '#' + `rev_number2`, junk[0] + '#' + `rev_number1` )
uni_adds = uni_adds + adds
uni_deletes = uni_deletes + deletes
sys.stdout.write( '\n\n\n' )
if ChangeConfirmed == 1:
#
# Print out the final report about this User
sys.stdout.write( '\tMetrics via RCS diff method Lines Added --> ' + `rcs_adds` + '\n' )
sys.stdout.write( '\t Lines Deleted--> ' + `rcs_deletes` + '\n\n' )
#
#
sys.stdout.write( '\tMetrics via Context diff method Lines Added --> ' + `ctx_adds` + '\n' )
sys.stdout.write( '\t Lines Changed--> ' + `ctx_changes` + '\n' )
sys.stdout.write( '\t Lines Deleted--> ' + `ctx_deletes` + '\n\n' )
#
#
sys.stdout.write( '\tMetrics via Summary diff method Add --> ' + `sum_addchunks` + ' Chunks\n' )
sys.stdout.write( '\t ' + `sum_adds` + ' Lines\n' )
sys.stdout.write( '\t Deleted--> ' + `sum_deletechunks` + ' Chunks\n' )
sys.stdout.write( '\t ' + `sum_deletechunks` + ' Lines\n' )
sys.stdout.write( '\t Changed--> ' + `sum_changechunks` + ' Chunks\n' )
sys.stdout.write( '\t ' + `sum_changes` + ' / ' + `sum_extra` + ' Lines\n\n' )
#
#
sys.stdout.write( '\tMetrics via Unified diff method Lines Added --> ' + `uni_adds` + '\n' )
sys.stdout.write( '\t Lines Deleted--> ' + `uni_deletes` + '\n\n' )
# reset this for the next pass
ChangeConfirmed = 0
# Main body of program
#
#
depot_location = sys.argv[1]
diff_label1 = sys.argv[2]
diff_label2 = sys.argv[3]
sys.stdout.write( 'Starting processing....\n\n' )
# call the diff with the summary flags
#
#
# fyi
sys.stdout.write('Start point ' + diff_label1 + ' End Point ' + diff_label2 + '\n' )
sys.stdout.write('Depot Range ' + depot_location + '\n\n' )
changelist_numbers = extract_change_numbers( depot_location, diff_label1, diff_label2 )
data_dict = gen_change_data( changelist_numbers )
gen_metrics_data( data_dict )