changemetrics.py #1

#! /usr/bin/env python
# $Id: //guest/thomas_quinot/perforce/utils/metrics/changemetrics.py#1 $
##
##  Written by Scott Pasnikowski around the time of 4/22/99
##  @symantec corp.   Home of the Norton utilities
##
##  Under duress of pesky QA people...
##
##  This comes with no guarantee whatsoever... on any level.
##
##  Consider this to be under the linux type liscense thingy
##  ( Don't recall what its called GNU or copyleft or whatever )
##  and if anyone complains my manager du jour said I could give it away.
##  ( he really did )
##

import sys, os, string, re


# everybody is using this
expFileNameField = re.compile( r'^(//.+)/(.+)' )


# This func corresponds to p4 diff2 with the -dn switch

def extract_metrics_counts_rcs_filetofile( depot_file_and_rev1, depot_file_and_rev2 ):
	# Does a p4 changes depot_root@label_one depot_root@label_two
    # depot_root is used to limit the diff range to a single "project"
    # or something even smaller.. like a single file
    # we then parse the output and add up the totals

  command = 'p4 diff2 -dn \"' + depot_file_and_rev1 + '\" \"' + depot_file_and_rev2 + '\"'
  lines_added = 0
  lines_changed = 0
  lines_deleted = 0

  #sys.stdout.write('Extraction function using ' + command +'  \n' )

  # break the line up according to...
  # sample line: a85 2
  #              operation (line)location (Number of lines affected)count
  p = re.compile( r'^([ad])([0-9]+) (\d+)' )

  for line in os.popen( command,'r').readlines():
    tmp = line[0:1]
    if tmp == 'a' or tmp == 'd':
          # only do this if we have a match otherwise groups() will blow chunks diffing an rtf
          m = p.match( line )
          if m:
            (operation, location, count) = re.match( r'^([ad])([0-9]+) (\d+)', line).groups()
            #sys.stdout.write( '>' + operation + '<    >' + location + '<   >' + count + '<\n' )
            if tmp == 'a':
               lines_added = lines_added + int( count )
            elif tmp == 'd':
               lines_deleted = lines_deleted + int( count )

  return lines_added, lines_deleted


# This func corresponds to p4 diff2 with the -dc switch

def extract_metrics_counts_context_filetofile( depot_file_and_rev1, depot_file_and_rev2 ):
	# Does a p4 changes depot_root@label_one depot_root@label_two
    # depot_root is used to limit the diff range to a single "project"
    # or something even smaller.. like a single file
    # we then parse the output and add up the totals

  command = 'p4 diff2 -dc \"' + depot_file_and_rev1 + '\" \"' + depot_file_and_rev2 + '\"'
  lines_added = 0
  lines_changed = 0
  lines_deleted = 0

  #sys.stdout.write('Extraction function using ' + command +'  \n' )

  for line in os.popen( command,'r').readlines():
    # sample line: Change number on date by name@machine 'comment'
    #              dummy  ch#
    #sys.stdout.write( line + '\n' )
    tmp = line[0:1]
    if tmp == '+':
       lines_added = lines_added + 1
    elif tmp == '-':
       lines_deleted = lines_deleted + 1
    elif tmp == '!':
       lines_changed = lines_changed +1

  return lines_added,lines_changed,lines_deleted


def extract_metrics_counts_summary_filetofile( depot_file_and_rev1, depot_file_and_rev2 ):
	# Does a p4 changes depot_root@label_one depot_root@label_two
    # depot_root is used to limit the diff range to a single "project"
    # or something even smaller.. like a single file
    # we then parse the output and add up the totals

  command = 'p4 diff2 -ds \"' + depot_file_and_rev1 + '\" \"' + depot_file_and_rev2 + '\"'
  lines_added = 0
  add_chunks = 0
  lines_deleted = 0
  delete_chunks = 0

  lines_changed = 0
  change_chunks = 0
  extra_data = 0

  #sys.stdout.write('Extraction function using ' + command +'  \n' )

  # This will match lines starting with
  exp1 = re.compile( r'^["add""deleted""changed]"' )
  #
  # The next 3 eat the add deleted and changed lines
  #  it could be one expression I suppose
  #
  exp2 = re.compile( r'^(add) (\d+) (chunks) (\d+)' )
  exp3 = re.compile( r'^(deleted) (\d+) (chunks) (\d+)' )
  exp4 = re.compile( r'^(changed) (\d+) (chunks) (\d+) / (\d+)' )

  for line in os.popen( command,'r').readlines():
     # only do this if we have a match
     m2 = exp2.search( line )
     m3 = exp3.search( line )
     m4 = exp4.search( line )

     if m2:
         ( operation, count1, chunks, count2 ) = m2.groups()
         add_chunks = add_chunks + int( count1 )
         lines_added = lines_added + int( count2 )
         #sys.stdout.write( count2 + '\n' )

     elif m3:
         ( operation, count1, chunks, count2 ) = m3.groups()
         delete_chunks = delete_chunks + int( count1 )
         lines_deleted = lines_deleted + int( count2 )
         #sys.stdout.write( count2 + '\n' )

     elif m4:
         ( operation, count1, chunks, count2, count3 ) = m4.groups()
         change_chunks = change_chunks + int( count1 )
         extra_data = extra_data + int( count2 )
         lines_changed = lines_changed + int( count3 )
         #sys.stdout.write( count3 + '\n' )

  return lines_added,add_chunks,lines_deleted,delete_chunks,lines_changed,change_chunks, extra_data


# This func corresponds to p4 diff2 with the -du switch


def extract_metrics_counts_unified_filetofile( depot_file_and_rev1, depot_file_and_rev2 ):
	# Does a p4 changes depot_root@label_one depot_root@label_two
    # depot_root is used to limit the diff range to a single "project"
    # or something even smaller.. like a single file
    # we then parse the output and add up the totals

  command = 'p4 diff2 -du \"' + depot_file_and_rev1 + '\" \"' + depot_file_and_rev2 + '\"'
  lines_added = 0
  lines_deleted = 0

  #sys.stdout.write('Extraction function using ' + command +'  \n' )

  for line in os.popen( command,'r').readlines():
    # sample line: Change number on date by name@machine 'comment'
    #              dummy  ch#
    #sys.stdout.write( line + '\n' )
    tmp = line[0:1]
    if tmp == '+':
       lines_added = lines_added + 1
    elif tmp == '-':
       lines_deleted = lines_deleted + 1

  return lines_added, lines_deleted


# This func generates a list of change numbers that have been
# added between the 2 points.
# It's equivalent to p4 changes //depot/some

def extract_change_numbers( depot_root, label_one, label_two ):
  # Does a p4 changes depot_root@label_one depot_root@label_two
  # depot_root is used to limit the diff range to a single "project"
  # or something even smaller.. like a single file
  # we then parse the output and add up the totals

  command1 = 'p4 changes ' + depot_root + label_one
  command2 = 'p4 changes ' + depot_root + label_two

  first_list = []
  second_list = []
  difflist = []

  # break the line up according to...
  #
  # This seems to work best with 2 groups
  expChangeNumber = re.compile( r'^(Change )(\d+)' )

  #sys.stdout.write('Extraction function using ' + command1 +'  \n' )
  for line in os.popen( command1,'r').readlines():
     s1 = expChangeNumber.search( line )
     ( junk, changeNumber1 ) = s1.groups()
     #sys.stdout.write('Change Number list 1-> ' + `changeNumber1` +'  \n' )
     first_list.append( changeNumber1 )

  #sys.stdout.write('Extraction function using ' + command2 +'  \n' )
  for line in os.popen( command2,'r').readlines():
     s1 = expChangeNumber.search( line )
     ( junk, changeNumber2 ) = s1.groups()
     #sys.stdout.write('Change Number list 2-> ' + `changeNumber2` +'  \n' )
     second_list.append( changeNumber2 )

  # The following is a very basic diff routine for 2 lists
  # Sum both lists then if consectutive elements match
  # we will not add either to final summation list

  summation = first_list + second_list
  summation.sort()

  index = 0
  difflist_length = 0
  while index <= len( summation ):
     #sys.stdout.write( 'Index ' + `index` + '\n' )
     if index + 1 >= len( summation ):
        if index != len( summation ):
            difflist_length = difflist_length + 1
            difflist.append( summation[ index ] )
        break
     else:
        if summation[ index ] == summation[ index + 1 ]:
           index = index + 2
           #sys.stdout.write( 'Index is ' + `index` + ' After jumpin over\n' )
        else:
           #sys.stdout.write( summation[ index ] + '\n' )
           difflist_length = difflist_length + 1
           difflist.append( summation[ index ] )
           index = index + 1

  # Lets print out some summary data
  sys.stdout.write( '\n' + `difflist_length` + ' Changelists submitted:\n' )
  for changeNumber in difflist:
     sys.stdout.write( changeNumber + ' ' )

  sys.stdout.write( '\n\n' )

  return difflist

def gen_change_data( listof_changenums ):
  # Parse the data for each change

  command = 'p4 describe -s '

  change_dict = {}

  # break the line up according to...
  #
  # This seems to work best with 2 groups
  expChangeFields = re.compile( r'^Change (\d+) by (.+) on (\d+)/(\d+)/(\d+) (\d+):(\d+):(\d+)' )
  expFileFields = re.compile( r'^... (.+)#(\d+) (.+)' )
  expFileNameField = re.compile( r'^(//.+)/(.+)' )

  #sys.stdout.write('Extraction function using ' + command +'  \n' )

  # for every change number do a p4 describe
  #
  for num in listof_changenums:
     #  Now for each p4 describe, suck in all the output lines
     #
     list_of_files = []
     for line in os.popen( command + num,'r').readlines():
        #  Now use re to recognize the header line or the file lines
        #
        sdata = expChangeFields.search( line )
        sfile = expFileFields.search( line )

        if sdata:
           # Aha! this is first line that contains the user name stuff...
           #
           ( changeNumber, user, year, month, day, hour, min, sec  ) = sdata.groups()

        if sfile:
           # This is a line listing a file that belongs to this change
           #
           ( depotPath, revnum, action  ) = sfile.groups()
           sfileandpath = expFileNameField.search( depotPath )
           if sfileandpath:
              ( pathOnly, fileName ) = sfileandpath.groups()
              #sys.stdout.write( pathOnly + ',' + fileName + ',' )
              list_of_files.append(  [ depotPath, revnum, action, user ] )

           #sys.stdout.write( revnum       + ',' +
           #                  action       + ',' +
           #                  changeNumber + ',' +
           #                  user         + ',' +
           #                  year         + ',' +
           #                  month        + ',' +
           #                  day          + ',' +
           #                  hour         + ',' +
           #                  min          + ',' +
           #                  sec          + '\n' )

     # after processing the change list add the data gained into the
     # into the dictionary of changlistss
     # Use the KeyError exception to add keys that are missing
     # otherwise just sum the list of files for each user
     #
     try:
        change_dict[ `changeNumber` ] = change_dict[ `changeNumber` ] + list_of_files
     except KeyError:
        change_dict[ `changeNumber` ] = list_of_files

  #temp_list = []
  #for stuff in user_dict.keys():
  #   sys.stdout.write( '\n\nUser --> ' + stuff  + '\n\n' )
  #   temp_list = user_dict[ stuff ]
  #   temp_list.sort()
  #   for junk in temp_list:
  #      sys.stdout.write( '\t' + junk + '\n' )


  return change_dict


def gen_metrics_data( data_dict ):
  temp_list = []
  #
  # For every entry in the dictionary
  # Each key being a different User
  for stuff in data_dict.keys():
     #
     # Copy the list out of the dictionary so I don't have to
     # use [][][]
     temp_list = data_dict[ stuff ]
     temp_list.sort()
     #
     # Lets start writing our report data
     sys.stdout.write( '\n\nChange --> ' + stuff  + '  Applied to ' + `len( temp_list )` + ' Files ' + \
                       '\t By User --> ' + data_dict[ stuff ][0][3] + '\n\n' )
     #
     # cleanup after last pass
     rcs_adds = rcs_deletes = ctx_adds = ctx_changes = ctx_deletes =0
     sum_adds = sum_addchunks = sum_deletes = sum_deletechunks = sum_changes = sum_changechunks = sum_extra = 0
     uni_adds = uni_deletes = 0
     #
     #
     for junk in temp_list:
        sys.stdout.write( '\t' + junk[0] + ' - ' + junk[1] + ' - ' + junk[2] + '\n' )
     for junk in temp_list:
        #sys.stdout.write( '\t' + junk[0] + ' - ' + junk[1] + ' - ' + junk[2] + '\n' )
        #
        # This file had a rev that can be diffed (as evidenced by the edit)
        if junk[2] == 'edit':
           # Only print data if you have an edit in the list that can provide data
           ChangeConfirmed = 1
           rev_number1 = int( junk[1] )
           # The implication here is that in any changelist your only submitting the next rev
           rev_number2 = rev_number1 - 1
           ( adds, deletes ) = extract_metrics_counts_rcs_filetofile( junk[0] + '#' + `rev_number2`, junk[0] + '#' + `rev_number1` )
           rcs_adds    = rcs_adds + adds
           rcs_deletes = rcs_deletes + deletes
           ( adds, changes, deletes) = extract_metrics_counts_context_filetofile( junk[0] + '#' + `rev_number2`, junk[0] + '#' + `rev_number1` )
           ctx_adds    = ctx_adds + adds
           ctx_changes = ctx_changes + changes
           ctx_deletes = ctx_deletes + deletes
           ( adds, addchunks, deletes, deletechunks, changes, changechunks, extra ) = extract_metrics_counts_summary_filetofile( junk[0] + '#' + `rev_number2`, junk[0] + '#' + `rev_number1` )
           sum_adds           = sum_adds          + adds
           sum_addchunks      = sum_addchunks     + addchunks
           sum_deletes        = sum_deletes       + deletes
           sum_deletechunks   = sum_deletechunks  + deletechunks
           sum_changes        = sum_changes       + changes
           sum_changechunks   = sum_changechunks  + changechunks
           sum_extra          = sum_extra         + extra
           ( adds, deletes) = extract_metrics_counts_unified_filetofile( junk[0] + '#' + `rev_number2`, junk[0] + '#' + `rev_number1` )
           uni_adds     = uni_adds     + adds
           uni_deletes  = uni_deletes  + deletes
     sys.stdout.write( '\n\n\n' )
     if ChangeConfirmed == 1:
        #
        # Print out the final report about this User
        sys.stdout.write( '\tMetrics via RCS diff method      Lines Added  --> ' + `rcs_adds` + '\n' )
        sys.stdout.write( '\t                                 Lines Deleted--> ' + `rcs_deletes` + '\n\n' )

        #
        #
        sys.stdout.write( '\tMetrics via Context diff method  Lines Added  --> ' + `ctx_adds` + '\n' )
        sys.stdout.write( '\t                                 Lines Changed--> ' + `ctx_changes` + '\n' )
        sys.stdout.write( '\t                                 Lines Deleted--> ' + `ctx_deletes` + '\n\n' )

        #
        #
        sys.stdout.write( '\tMetrics via Summary diff method  Add    --> ' + `sum_addchunks` + ' Chunks\n' )
        sys.stdout.write( '\t                                            ' + `sum_adds` + ' Lines\n' )
        sys.stdout.write( '\t                                 Deleted--> ' + `sum_deletechunks` + ' Chunks\n' )
        sys.stdout.write( '\t                                            ' + `sum_deletechunks` + ' Lines\n' )
        sys.stdout.write( '\t                                 Changed--> ' + `sum_changechunks` + ' Chunks\n' )
        sys.stdout.write( '\t                                            ' + `sum_changes` + ' / ' + `sum_extra` + ' Lines\n\n' )

        #
        #
        sys.stdout.write( '\tMetrics via Unified diff method  Lines Added  --> ' + `uni_adds` + '\n' )
        sys.stdout.write( '\t                                 Lines Deleted--> ' + `uni_deletes` + '\n\n' )
        # reset this for the next pass
        ChangeConfirmed = 0




# Main body of program
#
#
depot_location = sys.argv[1]
diff_label1 = sys.argv[2]
diff_label2 = sys.argv[3]

sys.stdout.write( 'Starting processing....\n\n' )

# call the diff with the summary flags
#
#
# fyi
sys.stdout.write('Start point  ' + diff_label1 + '  End Point  ' + diff_label2 + '\n' )
sys.stdout.write('Depot Range ' + depot_location + '\n\n' )

changelist_numbers = extract_change_numbers( depot_location, diff_label1, diff_label2 )
data_dict = gen_change_data( changelist_numbers )
gen_metrics_data( data_dict )
#	Change	User	Description	Committed
#1	411	Thomas Quinot	Scott Panikowski's metrics scripts.