excelmetrics.py #1

#! /usr/bin/env python
# $Id: //guest/thomas_quinot/perforce/utils/metrics/excelmetrics.py#1 $
##
##  Written by Scott Pasnikowski around the time of 4/22/99
##  @symantec corp.   Home of the Norton utilities
##
##  Under duress of pesky QA people...
##
##  This comes with no guarantee whatsoever... on any level.
##
##  Consider this to be under the linux type liscense thingy
##  ( Don't recall what its called GNU or copyleft or whatever )
##  and if anyone complains my manager du jour said I could give it away.
##  ( he really did )
##

import sys, os, string, re

# everybody is using this
expFileNameField = re.compile( r'^(//.+)/(.+)' )


# This func corresponds to p4 diff2 with the -dn switch

def extract_metrics_counts_rcs( depot_root, label_one, label_two ):
	# Does a p4 changes depot_root@label_one depot_root@label_two
    # depot_root is used to limit the diff range to a single "project"
    # or something even smaller.. like a single file
    # we then parse the output and add up the totals

  command = 'p4 diff2 -dn \"' + depot_root + label_one + '\" \"' + depot_root + label_two + '\"'
  lines_added = 0
  lines_changed = 0
  lines_deleted = 0

  #sys.stdout.write('Extraction function using ' + command +'  \n' )

  # break the line up according to...
  # sample line: a85 2
  #              operation (line)location (Number of lines affected)count
  p = re.compile( r'^([ad])([0-9]+) (\d+)' )

  for line in os.popen( command,'r').readlines():
    tmp = line[0:1]
    if tmp == 'a' or tmp == 'd':
          # only do this if we have a match otherwise groups() will blow chunks diffing an rtf
          m = p.match( line )
          if m:
            (operation, location, count) = re.match( r'^([ad])([0-9]+) (\d+)', line).groups()
            #sys.stdout.write( '>' + operation + '<    >' + location + '<   >' + count + '<\n' )
            if tmp == 'a':
               lines_added = lines_added + int( count )
            elif tmp == 'd':
               lines_deleted = lines_deleted + int( count )

  return lines_added, lines_deleted


# This func corresponds to p4 diff2 with the -dc switch

def extract_metrics_counts_context( depot_root, label_one, label_two ):
	# Does a p4 changes depot_root@label_one depot_root@label_two
    # depot_root is used to limit the diff range to a single "project"
    # or something even smaller.. like a single file
    # we then parse the output and add up the totals

  command = 'p4 diff2 -dc \"' + depot_root + label_one + '\" \"' + depot_root + label_two + '\"'
  lines_added = 0
  lines_changed = 0
  lines_deleted = 0

  #sys.stdout.write('Extraction function using ' + command +'  \n' )

  for line in os.popen( command,'r').readlines():
    # sample line: Change number on date by name@machine 'comment'
    #              dummy  ch#
    #sys.stdout.write( line + '\n' )
    tmp = line[0:1]
    if tmp == '+':
       lines_added = lines_added + 1
    elif tmp == '-':
       lines_deleted = lines_deleted + 1
    elif tmp == '!':
       lines_changed = lines_changed +1

  return lines_added,lines_changed,lines_deleted


def extract_metrics_counts_summary( depot_root, label_one, label_two ):
	# Does a p4 changes depot_root@label_one depot_root@label_two
    # depot_root is used to limit the diff range to a single "project"
    # or something even smaller.. like a single file
    # we then parse the output and add up the totals

  command = 'p4 diff2 -ds \"' + depot_root + label_one + '\" \"' + depot_root + label_two + '\"'
  lines_added = 0
  add_chunks = 0
  lines_deleted = 0
  delete_chunks = 0

  lines_changed = 0
  change_chunks = 0
  extra_data = 0

  #sys.stdout.write('Extraction function using ' + command +'  \n' )

  # This will match lines starting with
  exp1 = re.compile( r'^["add""deleted""changed]"' )
  #
  # The next 3 eat the add deleted and changed lines
  #  it could be one expression I suppose
  #
  exp2 = re.compile( r'^(add) (\d+) (chunks) (\d+)' )
  exp3 = re.compile( r'^(deleted) (\d+) (chunks) (\d+)' )
  exp4 = re.compile( r'^(changed) (\d+) (chunks) (\d+) / (\d+)' )

  for line in os.popen( command,'r').readlines():
     # only do this if we have a match
     m2 = exp2.search( line )
     m3 = exp3.search( line )
     m4 = exp4.search( line )

     if m2:
         ( operation, count1, chunks, count2 ) = m2.groups()
         add_chunks = add_chunks + int( count1 )
         lines_added = lines_added + int( count2 )
         #sys.stdout.write( count2 + '\n' )

     elif m3:
         ( operation, count1, chunks, count2 ) = m3.groups()
         delete_chunks = delete_chunks + int( count1 )
         lines_deleted = lines_deleted + int( count2 )
         #sys.stdout.write( count2 + '\n' )

     elif m4:
         ( operation, count1, chunks, count2, count3 ) = m4.groups()
         change_chunks = change_chunks + int( count1 )
         extra_data = extra_data + int( count2 )
         lines_changed = lines_changed + int( count3 )
         #sys.stdout.write( count3 + '\n' )

  return lines_added,add_chunks,lines_deleted,delete_chunks,lines_changed,change_chunks, extra_data


# This func corresponds to p4 diff2 with the -du switch


def extract_metrics_counts_unified( depot_root, label_one, label_two ):
	# Does a p4 changes depot_root@label_one depot_root@label_two
    # depot_root is used to limit the diff range to a single "project"
    # or something even smaller.. like a single file
    # we then parse the output and add up the totals

  command = 'p4 diff2 -du \"' + depot_root + label_one + '\" \"' + depot_root + label_two  + '\"'
  lines_added = 0
  lines_deleted = 0

  #sys.stdout.write('Extraction function using ' + command +'  \n' )

  for line in os.popen( command,'r').readlines():
    # sample line: Change number on date by name@machine 'comment'
    #              dummy  ch#
    #sys.stdout.write( line + '\n' )
    tmp = line[0:1]
    if tmp == '+':
       lines_added = lines_added + 1
    elif tmp == '-':
       lines_deleted = lines_deleted + 1

  return lines_added, lines_deleted

# This func corresponds to p4 diff2 with no switchs

def extract_metrics_counts_flat( depot_root, label_one, label_two ):
	# Does a p4 changes depot_root@label_one depot_root@label_two
    # depot_root is used to limit the diff range to a single "project"
    # or something even smaller.. like a single file
    # we then parse the output and add up the totals

  command = 'p4 diff2 ' + depot_root + label_one + ' ' + depot_root + label_two
  lines_added = 0
  lines_deleted = 0
  lines_changed = 0

  #sys.stdout.write('Extraction function using ' + command +'  \n' )

  expFile = re.compile( r'^====' )
  # The next 4 eat the add deleted and changed lines
  # it could be one expression I suppose
  #
  exp1 = re.compile( r'^(\d+)([acd])(\d+)' )
  exp2 = re.compile( r'^(\d+),(\d+)([acd])(\d+)' )
  exp3 = re.compile( r'^(\d+)([acd])(\d+),(\d+)' )
  exp4 = re.compile( r'^(\d+),(\d+)([acd])(\d+),(\d+)' )


  for line in os.popen( command,'r').readlines():
     # only do this if we have a match
     m1 = exp1.search( line )
     m2 = exp2.search( line )
     m3 = exp3.search( line )
     m4 = exp4.search( line )

     if m4:
         ( count1, count2, operation, count3, count4 ) = m4.groups()
         mLine = expFile.match( prev_line )
         if mLine:
            sys.stdout.write( prev_line + '\n' )

         sys.stdout.write( count1 + ' ' + count2 + ' ' + operation + ' ' + count3 + ' ' + count4 + '\n' )

     elif m3:
         ( count1, operation, count2, count3 )= m3.groups()
         mLine = expFile.match( prev_line )
         if mLine:
            sys.stdout.write( prev_line + '\n' )
         sys.stdout.write( count1 + ' ' + operation + ' ' + count2 + ' ' + count3 + '\n' )

     elif m2:
         ( count1, count2, operation, count3 ) = m2.groups()
         mLine = expFile.match( prev_line )
         if mLine:
            sys.stdout.write( prev_line + '\n' )

         sys.stdout.write( count1 + ' ' + count2 + ' ' + operation + ' ' + count3 + '\n' )

     elif m1:
         ( count1, operation, count2 ) = m1.groups()
         mLine = expFile.match( prev_line )
         if mLine:
            sys.stdout.write( prev_line + '\n' )

         sys.stdout.write( count1 + ' ' + operation + ' ' + count2 + '\n' )

     prev_line = line

  return lines_added, lines_deleted, lines_changed


def extract_metrics_counts_file( depot_root, label_one, label_two ):
	# Does a p4 changes depot_root@label_one depot_root@label_two
    # depot_root is used to limit the diff range to a single "project"
    # or some subset even smaller..
    # we then parse the output and add up the totals

  # Use quotes around the file params in case someone inputs a path that contains space chars
  command = 'p4 diff2 \"' + depot_root + label_one + '\" \"' + depot_root + label_two  + '\"'

  debug = 0

  files_added = 0
  files_deleted = 0
  files_typechange = 0
  files_changed = 0
  revisions_added = 0

  file_count1 = 0
  file_count2 = 0
  file_list = []

  # fyi
  #sys.stdout.write('Start point  ' + label_one + '  End Point  ' + label_two + '\n' )
  #sys.stdout.write('Depot Range ' + depot_root + '\n\n' )

  # Only process the lines that list filespec1 - filespec2
  expMain = re.compile( r'^====' )

  # If the first file is missing it will show up as the following
  expMissingFirst = re.compile( r'< none >' )

  # If the second file is missing is will show up as ( notice missing spaces)
  expMissingSecond = re.compile( r'<none>' )

  # special case of no second file
  exp1 = re.compile( r' - <' )
  # in all other cases this is the separator
  exp2 = re.compile( r' - //' )
  # We will use the # to split the file spec from the revision number
  exp3 = re.compile( r'#' )
  spaceExp = re.compile( r' ' )

  for line in os.popen( command,'r').readlines():
     # only for lines listing file specs
     mMain = expMain.search( line )
     if mMain:

        if debug:
           sys.stdout.write( line )

        # Preventive cleanup
        file1_section1 = file1_section2 = file2_section1 = file2_section2 = ''
        file1_revnum = file2_revnum = file1_type =file2_type = delimiter = compare_status = ''
        temp = 0

        mMissingFirst = expMissingFirst.search( line )
        mMissingSecond = expMissingSecond.search( line )

        # lets start off thinking that each file has 1 rev then
        # later we will break the "real" rev number out of each filespec
        # and place it in one of these
        #
        rev_number1 = 1
        rev_number2 = 1
	
		
		# String splitting for first file
		#
        if mMissingFirst:
		   # First file is empty so this is an added file
           if debug:
              sys.stdout.write( 'Added!!!!\n' )
           files_added = files_added + 1
           rev_number1 = 0
        else:
           # break the line up into each of its file parts 	
           # we must check here if we are missing the second filespec because
           # then the delimiter changes from "- //' to "- <"
           # We cant use plain "-" because it may be in a filename
           if mMissingSecond:
              ( section1, section2 ) = exp1.split( line )
           else:
              ( section1, section2 ) = exp2.split( line )

           if debug:
              sys.stdout.write( 'section1->' + section1 + '\n' )
              sys.stdout.write( 'section2->' + section2 + '\n' )

           # main count files in first @label
           file_count1 = file_count1 + 1
	
	       # now split the path from the rev number
           ( file1_section1, file1_section2 ) = exp3.split( section1 )
           if debug:
              sys.stdout.write( 'file1_section1->' + file1_section1 + '\n' )
              sys.stdout.write( 'file1_section2->' + file1_section2 + '\n' )
		
           if mMissingSecond:
              # If your missing second file there will be no file type to
              # split off
              rev_number1 = int( file1_section2 )
              if debug:
                 sys.stdout.write( 'file1 rev number->' + file1_section2 + '\n' )
           else:
              ( file1_revnum, file1_type ) = re.split( ' ', file1_section2, 2 )
              rev_number1 = int( file1_revnum )
              if debug:
                 sys.stdout.write( 'file1 rev number->' + file1_revnum + '\n' )
                 sys.stdout.write( 'file1 type->' + file1_type + '\n' )


        # String splitting for second file
        #
        if mMissingSecond:
           # Second file is empty so this is a deleted file
           files_deleted = files_deleted + 1
           rev_number2 = 0

           # Restore file name for re search (add //) and print out excel line
           #
           s1 = expFileNameField.search( '//' + file2_section1 )
           if s1:
              ( depotPath, fileName ) = s1.groups()
              sys.stdout.write( depotPath + ',' + file2_section1 + ',Deleted,0,0,0,0,0,0,0,0,0,0,0,0,0,0\n'  )

           if debug:
              sys.stdout.write( 'Deleted!!!!\n' )
        else:
            # We are here so we dont need to test for second filespec missing
            ( section1, section2 ) = exp2.split( line )
            if debug:
               sys.stdout.write( 'section1->' + section1 + '\n' )
               sys.stdout.write( 'section2->' + section2 + '\n' )

            # main count of files in second @label
            file_count2 = file_count2 + 1

            ( file2_section1, file2_section2 ) = exp3.split( section2 )
            if debug:
               sys.stdout.write( 'file2_section2->' + file2_section2 + '\n' )
            if mMissingFirst:
               # This is a file that has been added
               #
               s1 = expFileNameField.search( '//' + file2_section1 )
               if s1:
                  ( depotPath, fileName ) = s1.groups()
                  sys.stdout.write( depotPath + ',' + fileName + ',Added,0,0,0,0,0,0,0,0,0,0,0,0,0,0\n'  )
               ( file2_revnum, compare_status ) = re.split( ' ', file2_section2, 2 )
               if debug:
                  sys.stdout.write( 'file2 rev number->' + file2_revnum + '\n\n' )
               revisions_added = revisions_added + int( file2_revnum )		
               #sys.stdout.write( 'Added,//' + file2_section1 )
               #sys.stdout.write( ',' + `int( file2_revnum)` + '\n' )
            else:		
               ( file2_revnum, file2_type, delimiter, compare_status ) = re.split( ' ', file2_section2, 4 )
               if debug:
                  sys.stdout.write( 'file2 rev number->' + file2_revnum + '\n' )
                  sys.stdout.write( 'file2 type->' + file2_type + '\n' )
                  sys.stdout.write( 'file compare status->' + compare_status + '\n' )
               rev_number2 = int( file2_revnum )
               temp = rev_number2 - rev_number1
               revisions_added = revisions_added + temp
               if rev_number1 != rev_number2:
                  file_list.append( '//' + file2_section1 )
                  #sys.stdout.write( '//' + file2_section1 + '\n' )
                  files_changed = files_changed + 1
               else:
                  s1 = expFileNameField.search( '//' + file2_section1 )
                  if s1:
                     ( depotPath, fileName ) = s1.groups()
                     sys.stdout.write( depotPath + ',' + fileName + ',Unchanged,0,0,0,0,0,0,0,0,0,0,0,0,0,0\n'  )

               if file1_type != file2_type:
                  files_typechange = files_typechange + 1

  return files_added, files_deleted, files_typechange, files_changed, revisions_added, file_count1, file_count2, file_list





#
# This script takes a depot path and two labels (like @jackalope.20 )
# OR two date-time combos (like @1999/02/02 )
#
# The output then gives you the depot path, file name, action type and line change counts
# of files changed deleted or added.  You can then feed the output to excel as a comma
# delimited database *.csv and make pretty charts (barf)
#


# Main body of program
#
#
depot_location = sys.argv[1]
diff_label1 = sys.argv[2]
diff_label2 = sys.argv[3]

#sys.stdout.write( 'Starting processing tree data....\n\n' )

#call the diff with the RCS flags
#
#
adds = changes = deletes = 0

( added, deleted, typechange, changed, revs_added, count1, count2, list_of_changed_files ) = \
     extract_metrics_counts_file( depot_location, diff_label1, diff_label2 )



for different_file in list_of_changed_files:
   #sys.stdout.write( different_file  + '\n' )

   ( rcsadds, rcsdeletes) = \
              extract_metrics_counts_rcs( different_file, diff_label1, diff_label2 )

   ( ctxadds, ctxchanges, ctxdeletes) = \
              extract_metrics_counts_context( different_file, diff_label1, diff_label2 )

   ( sumadds, sumaddchunks, sumdeletes, sumdeletechunks, sumchanges, sumchangechunks, sumextra ) = \
              extract_metrics_counts_summary( different_file, diff_label1, diff_label2 )

   ( uniadds, unideletes) = \
              extract_metrics_counts_unified( different_file, diff_label1, diff_label2 )

   s1 = expFileNameField.search( different_file )
   if s1:
      ( depotPath, fileName ) = s1.groups()

   sys.stdout.write( depotPath           + ',' +
                     fileName            + ',' +
                     'Changed'           + ',' +
                     `rcsadds`           + ',' +
                     `rcsdeletes`        + ',' +
                     `ctxadds`           + ',' +
                     `ctxchanges`        + ',' +
                     `ctxdeletes`        + ',' +
                     `sumadds`           + ',' +
                     `sumaddchunks`      + ',' +
                     `sumdeletes`        + ',' +
                     `sumdeletechunks`   + ',' +
                     `sumchanges`        + ',' +
                     `sumchangechunks`   + ',' +
                     `sumextra`          + ',' +
                     `uniadds`           + ',' +
                     `unideletes`        + '\n' )
#	Change	User	Description	Committed
#1	411	Thomas Quinot	Scott Panikowski's metrics scripts.