genmetadata #64

eval '(exit $?0)' && eval 'exec perl -S $0 ${1+"$@"}'
  & eval 'exec perl -S $0 $argv:q'
  if 0;
#  THE PRECEEDING STUFF EXECS perl via $PATH
# -*-Fundamental-*-

#  $Id: //guest/richard_geiger/utils/cvs2p4/bin/genmetadata#64 $
#
#  Richard Geiger
#

require 5.000;
require "timelocal.pl";

#use bytes;


sub dirname
{
  local($dir) = @_;
  $dir =~ s%^$%.%; $dir = "$dir/";
  if ($dir =~ m%^/[^/]*//*$%) { return "/"; }
  if ($dir =~ m%^.*[^/]//*[^/][^/]*//*$%)
  { $dir =~ s%^(.*[^/])//*[^/][^/]*//*$%$1%; { return $dir; } }
  return ".";
}

use Carp; # ...or flounder. (This will fail unless 'perl' is a perl5!)
$| = 1;

($Myname = $0) =~ s%^.*/%%;
$Mydir = &dirname($0);
$Here = `/bin/pwd`; chop $Here;
if ($Mydir ne ".") { chdir "$Mydir" || die "$Myname: can't chdir \"$Mydir\": $!"; }
chdir ".." || die "$Myname: can't chdir \"..\": $!";
$Mydir = `/bin/pwd`; chop $Mydir;
chdir $Here || die "$Myname: can't chdir \"$Here\": $!";

require "$Mydir/lib/util.pl";

$Usage = <<LIT;
$Myname: usage: $Myname [-prescan]
LIT


sub usage
{
  print STDERR $Usage;
  exit 1;
}


sub help
{
  print STDERR <<LIT;
$Usage
$Myname is not done yet. Be patient.
LIT
  exit 1;
}



######
#
#  Perlstuff for parsing RCS repository files
#

#  Some globals used by these routines...
#
$Rcs_Inquote = 0;       # remembers when we're in a '@' quoted string
$Rcs_Eofatal = 1;       # die if we hit the end of the file
$Rcs_File    = "?";     # caller should set this for the error message


sub lead
{ if (defined($Myname)) { return "$Myname: "; } else { return ""; } }


sub dirname
{
  my ($dir) = @_;

  $dir =~ s%^$%.%; $dir = "$dir/";
  if ($dir =~ m%^/[^/]*//*$%) { return "/"; }
  if ($dir =~ m%^.*[^/]//*[^/][^/]*//*$%)
    { $dir =~ s%^(.*[^/])//*[^/][^/]*//*$%$1%; { return $dir; } }
  return ".";
}


sub skip_to_rcstok
{
  my ($this) = @_;
  my $tok;
  while (($tok = &rcstok()) ne $this) { };
}


sub sdump
{
  my ($s) = $1;

  my @s = split(//, $s);

  my $ret = "";
  foreach my $c (@s)
    { $ret .= sprintf(" %02x", ord($c)); }
  return $ret;
}


sub setrevs
{
  my($d_rev, $d_next, $d_branches, $d_date, $d_author, $d_state) = @_;

  my($b_rev);

  $RCS_Revs{$d_rev} = "$d_next:$d_branches";
  $d_date = "19$d_date" if length(( split( /\./, $d_date ))[0]) < 4;
  $RCS_Dates{$d_rev} = "$d_date";
  $RCS_Authors{$d_rev} = "$d_author";
  $RCS_States{$d_rev} = "$d_state";

# We DON'T want to do this...
# (use the name for the vendor branch from the cvs import!)
#
#  if ($d_rev =~ /^1\.1\.1(\.[0-9]+)?$/)
#    {
#      # We have a "vendor" branch - spoof a branch tag for it.
#      #
#      my $import_branch = "import";
#      if ($PureRCS) { $import_branch = "1.1.1"; }
#
#      $Brtags{$import_branch} = 1;              
#      $RCS_Branchtags{$import_branch} = "1.1.0.1";
#      $RCS_Tags{$import_branch} = "1.1.0.1";
#    }

  if ($d_rev =~ /^\d+\.\d+$/)
    {
      $RCS_Prevs{$d_rev} = $d_next;
      if ($d_next) { $RCS_Nexts{$d_next} = $d_rev; }
    }
  else
    {
      if ($d_next) { $RCS_Prevs{$d_next} = $d_rev; }
      $RCS_Nexts{$d_rev} = $d_next;
    }

  foreach $b_rev (split(/ /, $d_branches))
    { $RCS_Prevs{$b_rev} = $d_rev; }
}


sub potential_branches
{
  my ($tag) = @_;
  my @res;

  # Look for a branch this revision is present in...
  #
  my $sel_branch = $RCS_Tags{$tag};
  ($sel_branch) = ($sel_branch =~ /^(.*)\.\d+$/);

  if ($sel_branch =~ /^\d+$/)
    { push(@res, "main"); }
  else
    {
      my ($sel_brbase, $sel_brnum) = ($sel_branch =~ /^(.*)\.(\d+)$/);
      my $sel_brrev = "$sel_brbase.0.$sel_brnum";
      if ($RCS_rev_brtags{$sel_brrev}) { push(@res, $RCS_rev_brtags{$sel_brrev}); }
    }

  my $try_limit = 10;
  my $g = 0;
  my $try_tag;

  # Look for branches rooted at this revision:
  #
  for ($i = 1; $g <= $try_limit; $i++)
    {
      my $try_rev = "$RCS_Tags{$tag}.0.$i";
      if ($RCS_rev_brtags{$try_rev}) { push(@res, $RCS_rev_brtags{$try_rev}); }

      # If we see that the branch number exists, we should keep looking.
      # We want to give up when we either A: find the branch or
      # B: have found $try_limit unused branch bumbers in a row.
      # Since CVS is supposed to dole them out sequentially, this
      # -should- be sufficient to decide that there are no more.
      # This should be better for efficiency, compared to checking
      # every key in $RCS_rev_brtags.
      #
      if (defined($RCS_rev_brtags{$try_tag})) { $g = 0; next; }
      $g++;
    }

  return @res;
}


#  This is a void function, since $sel_br will either be correct, or
#  an import->(import,main) remap, or a collision. Whichever holds,
#  there's nothing more to be done for the tag on this file that we
#  can do here, so no return value needed.
#
sub set_mapping
{
  my ($tag, $sel_br) = @_;

  # Check to make sure we don't have a mapping collision...
  #
if ($Tags{$tag} && ($Tags{$tag} ne $sel_br))
    {
#      if ($Tags{$tag} eq "main" || $sel_br eq "main")
#      if ($sel_br eq "main" && )
#        { $sel_br = "main"; }
#      else
#        {
          # Oops! Collision!
          # Probably want to log and proceed... TBD
          #  
	  if (! ($Tags{$tag} eq "main" && $sel_br eq $RCS_import_branch))
            {
              print "\nERROR: file: $RCS_File: conflicting branch determination for tag \"$tag\": ".
                    "  <$Tags{$tag}> vs <$sel_br>; ".
                    "  (previous file:\"$Tagfiles{$tag}\") RCS_Tags{$tag} = <$RCS_Tags{$tag}>.\n";
            }
          $sel_br = "UNMAPPED-COLLISION [$Tags{$tag}] [$sel_br]"; 
#        }
    }

  # Ahhhhhhhhhh that feels so good.... 
  #

  $Tags{$tag} = $sel_br;
  $Tagfiles{$tag} = "$RCS_File";
}
          

sub exclude
{
  my ($repfile, $tag, $is_branch) = @_;

  my $mod = $repfile;
  $mod =~ s/^$CVS_MODULE\/?//;

  if ($mod =~ /^([^\/]+)\//)
    { $mod = $1; }
  else
    { $mod = ""; }

  if ($is_branch)
    { return (defined(${Exclude_branches{"*"}}{$tag}) || defined(${Exclude_branches{$mod}}{$tag})); }
  else
    { return (defined(${Exclude_tags{"*"}}{$tag}) || defined(${Exclude_tags{$mod}}{$tag})); }
}


#  initialize RCS_Tags, RCS_Revs, (etc.) from an RCS ,v file.
#
sub set_RCS_revs
{
  my ($path, $do_texts) = @_;
  my $repdir;
  my $repfile;
  my $tag;
  my $rev;
  my $tok;
  my ($d_havedelta, $d_branches, $d_next, $d_rev);
  my ($ext, $format);
  my $msg;
  my $admaci = 0;
  my $File;

  undef $RCS_File;
  undef $RCS_Valid;
  undef $RCS_expand;
  undef $RCS_exec;
  undef %RCS_Tags;		# both plain and branch (and special "head")
  undef %RCS_Branchtags;        # branch, only
  undef %RCS_rev_brtags;        # inverse of the above - keyed by branch tag value
  undef %RCS_Revs;
  undef %RCS_States;
  undef %RCS_Authors;
  undef %RCS_Dates;
  undef %RCS_Nexts;
  undef %RCS_Prevs;
  undef $RCS_Branch;
  undef $RCS_import_is_main;
  undef $RCS_import_branch;

  undef $rcsline_buf;

  ($Rcs_File = $path) =~ s%^.*/%%;
  ($File = $Rcs_File) =~ s/,v$//;

  $repdir = &dirname($path);

  if (-r "$repdir/.adamci,v") { $adamci = 1; }

  $repfile = $path;

  $RCS_File = $repfile;

  if (-x $repfile) { $RCS_exec = "x"; }

  # What the subshell needs for "'" escaping in an "'"-quoted string:
  #
  $path =~ s/'/'\\''/g;
  my $rlogcmd = "$Mydir/bin/rlog '$path'";

  if (! open(RLOG, "$rlogcmd | ")) { die "\n\nopen [$rlogcmd]"; }

  local $mode = "head";
  local $rev_num;
  local $rev_msg;
  local $rev_author;
  local $rev_state;
  local $rev_date;
  local $rev_next;
  local $rev_branches;
  local @tmprevs;

  # This function achieves 100% global abuse!
  # (Basically just to save inlining the code)...
  #
  sub put_rev
  {
    # First, trim the tailing "---..." lines from the log message...:

    while ($rev_msg =~ /----------------------------\n$/s)
      { $rev_msg =~ s/----------------------------\n$//s; }
  
    # This little stackiness adjusts the order in which
    # revisions are seen to bee that in which they occur in
    # the file; rlog inverts them.
    #
    if (($rev_num =~ tr/\./\./) == 1 || $rev_num =~ /\.1$/)
      {
        &setrevs($rev_num, $rev_next, $rev_branches, $rev_date, $rev_author, $rev_state);
        my $revstr;
        while ($revstr = pop(@revstack))
          { &setrevs(split(/\001/, $revstr)); }
      }
    else
      { push(@revstack, "$rev_num\001$rev_next\001$rev_branches\001$rev_date\001$rev_author\001$rev_state"); }
  
    $RCS_Logs{$rev_num} = $rev_msg;
    $mode = "rev";
  }

  my $have_rev = 0;

  while (<RLOG>)
    {
      if ($mode eq "head")
        {
          if (/^RCS file: (.*)$/) { $RCS_File = $1; }
          if (/^head: (.*)$/)     { $RCS_Tags{"head"} = $1; }
          if (/^branch: (.*)$/)
            {
              $RCS_Branch = $1;
              if ($RCS_Branch eq "1.1.1") { $RCS_import_is_main = 1; }
            }
          if (/^symbolic names:/) { $mode = "symbols"; }
        }
      elsif ($mode eq "symbols")
        {
          # This now has to be a two-pass operation, since we need to know
          # the vendor beanch name before doing the revisions properly...

          if (/^	([^ :]+): (.*)$/)
            {
	      my ($tag, $rev) = ($1, $2);
              push (@tmprevs, "$1$S$2");
              if ($rev eq "1.1.1") { $RCS_import_branch = $tag; }
            }
          elsif (/^keyword substitution: (.*)/)
            { 
              $RCS_expand = $1;
              if ($RCS_expand eq "kv") { $RCS_expand = ""; }
              $mode = "mid";
            }
        }         
      elsif ($mode eq "mid")
        {
          # Now we process the revision information more fully...
          #
          while ($#tmprevs >= 0)
            {
              my ($tag, $rev) = split(/$S/, shift(@tmprevs));

              if (($cnt = $rev =~ tr/\./\./) % 2 == 0)
                {
                  # Handle "RCS" branch tags:
                  #
                  my @nums = split(/\./, $rev);
    
                  splice @nums, $#nums, 0, (0);
                  $rev = join(".", @nums);
                }
        
              if ((! $PureRCS) && $rev =~ /\.0\.[0-9]+$/)
                {
                  if (! $adamci)
                    {
                      if (defined($BRANCH_FLASH)) { $tag =~ s/$BRANCH_FLASH$//; }
                      $RCS_Tags{$tag} = $rev;
                      $RCS_Branchtags{$tag} = $rev;
                      $Brtags{$tag} = 1;              
                      if ($RCS_rev_brtags{$rev})
                        { print "WARNING: file: $RCS_File: dup CVS branch tags on rev <$rev> (tag <$tag>)(ignored)\n"; }
                      else
                        { $RCS_rev_brtags{$rev} = $tag; }
                    }
                }
              elsif (($cnt = $rev =~ tr/\./\./) % 2 == 1)
                {
                  $RCS_Tags{$tag} = $rev;
                  my $rcspath;
                  ($rcspath = $repfile) =~ s/,v$//;
        
                  if (! &exclude($repfile, $tag))
                    {
		      # Is this one of those wacky main/import shared revs...?

                      my $import_as_main = "";

                      if ($rev =~ /^1\.1\.1\.(\d+)$/ &&
                        ((! defined($RCS_Revs{"1.2"})) || ($RCS_Dates{$rev} < $RCS_Dates{"1.2"})))
                          { $import_as_main = $RCS_import_branch; }

                      print LABELS "$tag$S$rcspath$S$rev$S$import_as_main\n";

                      # don't want to wipe out previously observed ones, as it might
                      # already have found the "real" mapping!
                      #
                      if ($import_as_main)
                        { $Tags{$tag} = "main"; }
                      if (! defined($Tags{$tag})) { $Tags{$tag} = ""; } 
                    }
                }
              # end of deferred rev processing
            }

          if (/^description:/) { $mode = "rev"; }
        }
      elsif (($mode eq "rev" || $mode =~ /revmsg/) && /^revision\s+([^\s]+)\s*next\s*([^\s]*)$/)
        {
          my ($t1, $t2) = ($1, $2);
          if ($have_rev) { &put_rev(); }
          $rev_num = $t1; $rev_next = $t2; 
          $rev_branches = "";
          $have_rev = 1;
          $mode = "rev";
        }
      elsif ($mode eq "rev")
        {
          if (/^date: ([^;]+);\s+author: ([^;]+);\s+state: ([^;]+);/)
            {
              ($rev_date, $rev_author, $rev_state) = ($1, $2, $3);

              my @d = split(/[\/ :]/, $rev_date);
              if ($d[0] < 2000) { $d[0] -= 1900; }

              $rev_date = join(".", @d);

              $RCS_dates{$rev_num} = $rev_date;
              $RCS_authors{$rev_num} = $rev_author;
              $RCS_states{$rev_num} = $rev_state;
              $rev_msg = ""; $mode = "revmsg0";
            }
        }
      elsif ($mode =~ "^revmsg")
        {
          if ($_ eq "=============================================================================\n")
            { if ($have_rev) { &put_rev(); } }
          elsif ($mode eq "revmsg0" && /^branches:\s+(.*)/)
            {
              my $branches = $1;
              $branches =~ s/;//g;
              foreach my $branch (split(/\s+/, $branches))
                {
                  if ($PureRCS)
                    {
                      my ($l, $b, $r) = ($tok =~ /(.*)\.(\d+)\.(\d+)$/);
                      my $tag = "$l.$b";
                      $RCS_Branchtags{$tag} = $rev;
                    }
                  if ($rev_branches ne "") { $rev_branches .= " "; }
                  $rev_branches .= "$branch.1";
               }
            }          
          else
            { $rev_msg .= "$_"; }
          $mode = "revmsg1";
        }
      else
        {
          die "assert can't get here";
        }
    }

  # OK, we get here having seen every tag in the file. See whether,
  # for any of the tags we saw in this file, we can determine the
  # branch for that tag, and remember the mapping globally.

  try_tag: foreach my $tag (sort(keys(%RCS_Tags)))
    {
      # Once UNMAPPED, always UNMAPPED...  (really means a collision
      # for this tag happened previously), so no need to go on with it
      # for this file.
      # 
      if ($Tags{$tag} =~ /^UNMAPPED/) { next try_tag; }

      # Only want to consider rev tags:
      #
      if (defined($RCS_Branchtags{$tag})) { next try_tag; }

      # Throw away stuff on the exclude list...

      if (&exclude($RCS_File, $tag)) { next try_tag; }

      my $tagrev = $RCS_Tags{$tag};
      my (@sel_brs) = &potential_branches($tag);

#      # Branch 1.1.1 may need to be either "import" or "main",
#      # depending on whether this file has "branch: 1.1.1" set,
#      # so it gets special handling:
#
#      #### "potential import" case:
#      #
#      if ($tagrev =~ /^1\.1\.1\.(\d+)$/)
#        {
#          #  If there are no branch tags that select this rev,
#          #  as a branch-point, use "main"; If exactly one,
#          #  use it; If more than one, don't decide. 
#
#          my $sel_br;
#
#          if ($#sel_brs < 0)
#            {
#              if ($RCS_import_is_main)
#                { $sel_br = "main"; }
#              else
#                { $sel_br = "import"; } # TBD
#            }
#          elsif ($#sel_brs == 0)
#            { $sel_br = $sel_brs[0]; }
#
#          if ($sel_br) { &set_mapping($tag, $sel_br); }
#
#          next try_tag;
#        }

      #### "non-potential import" case:
      #

      # So: do any of the known branch tags in this file select the
      # tagged revision?
      #
      my $tagrev_brnum;
      ($tagrev_brnum = $tagrev) =~ s/\.\d+$//;

      if ($#sel_brs != 0) { next try_tag; }

      $sel_br = $sel_brs[0];

      if ($RCS_import_is_main && $sel_br eq "main" && $RCS_Tags{$tag} =~ /^1\.1\.1\.\d+$/)
        { $Tags{$tag} = "main"; }

      # so... if we get here, in theory, the heuristic was applicable;
      # exactly one branch tag selected this branch point. $sel_br has
      # the presumptive mapping.

      # Now, make sure we didn't get a different answer than from some previous
      # file:
      #
      &set_mapping($tag, $sel_br);
    }
      
  if ($Prescan) { return 1; }

  if ($adamci)
    {
      undef %metadata;
      eval `$CO -q -p $repdir/.adamci,v`; # TBD: Optimize this to only reread if new dir?

      foreach my $p (keys(%{$metadata{'context_rules'}}))
        {
          my $k;
          ($k) = keys( %{${${$metadata{'context_rules'}}{$p}}{$File}}  );

          if (defined($k) && $k ne "TBB")
            {
              my $rev = ${${${${$metadata{'context_rules'}}{$p}}{$File}}{$k}}[1];
              $rev =~ s/^(.*\.\d+)\.(\d+)$/$1.0.$2/;
              $RCS_Tags{$p} = $rev;
              $RCS_Branchtags{$p} = $rev;
            }
        }
    }

  close RCS;
  $RCS_Valid = 1;

  return 1;
}


sub rcs_tip
{
  my ($rev) = @_;
  my $next;

  #  Find the tip of the branch...
  #
  while (1)
    {
      if (! defined($RCS_Revs{$rev}))
        { return "???"; }
      ($next) = split(/:/, $RCS_Revs{$rev});
      if ($next eq "") { return $rev; }
      $rev = $next;
    }
}


#  given a "CVS line spec" (revision #, "head", or a tag)
#
sub rev_on_line
{
  my($line) = @_;

  if ($line eq $TRUNKLINE)
    { $line = "head"; }
#  else
#    { $line = "${line}_BRANCH"; }

  if (defined($RCS_Tags{$line})) { $line = $RCS_Tags{$line}; }
  elsif ($line !~ /^[0-9.]+$/) { return "none"; }
  if ($line =~ /\.0\.([0-9]+)$/)
    {
      # It's a CVS branch revision number... demunge it:
      #
      $line =~ s/\.0(\.[0-9]+)$/$1/;

      #  OK, see whether the branch actually exists:
      #  (We have an assumption here that first rev is always ".1")
      #
      $line = "$line.1";
      if (! defined($RCS_Revs{$line}))
        {
          # Nope, so fall back to the root, which we know to be an
          # existing revision...

          $line =~ s/\.[0-9]+\.[0-9]+$//;
          return $line;
        }

      # Yep, the branch exists; so it *is* a branch; so, we go out to
      # the tip. (Right?)
      #
      return &rcs_tip($line);
    }

  #  OK, do we have an RCS branch or an RCS revision number?  (count
  #  the dots)
  #
  if (($line =~ tr/\././) % 2)
    {
      #  An odd number of dots... it's a revision number
      #
      if (defined($RCS_Revs{$line})) { return $line; }

      return "none"; # Or should we assert?
    }
  else
    {
      #  An even number of dots... it's a branch number
      #  (We have an assumption here that first rev is always ".1")
      #
      return &rcs_tip("$line.1");
    }
}


#  Is rev "$this" < rev "$that"?
#  Note: "" is considered infinitely high
#        revs must be of the same order (I.e., same # of "."s)
#
sub rev_lt
{
  my($this, $that) = @_;
  my(@this, @that);

  if (! $that) { return 1; }

  @this = split(/\./, $this);
  @that = split(/\./, $that);

  while (1)
    {
      $this_n = shift(@this);
      $that_n = shift(@that);

      if ($this_n < $that_n) { return 1; }
      if ($this_n > $that_n) { return 0; }
      if ($#this < 0) { return 0; }
    }
}


#  Note: "" is considered infinitely high
#
sub linerev_gt
{
  my($this, $that) = @_;
  my $ret;

  if (! $that)
    { $ret =  1; }
  else
    {
      my $thisord, $thatord;
      $thisord = ($this =~ tr/\././);
      $thatord = ($that =~ tr/\././);
      if ($thisord < $thatord)
        { $ret = 1; }
      elsif ($thisord > $thatord)
        { $ret = 0; }
      else
        { $ret = &rev_lt($that, $this); }
    }
  return $ret;
}


# Maximum size for a log message we'll keep.
# Messages beyond this get truncated, to accomodate a limitation
# on the key/value pair size in ndbm. That's life.
#
$MAXSZ = 256*3;


#  Generate the metadata for a single file
#
sub dofile
{
  local($dir, $file) = @_;

  if ($file !~ /,v$/) { return; }
  if ($IGNOREFILES && $file =~ /$IGNOREFILES/) { return; }

  if ($file =~ /[\000-\037\177-\377]/)
    {
      print "$Myname: RCS filename with non-printable characters (skipped): ";
      $l = length($file);
      for ($i = 0; $i <= $l; $i++)
        {
          $c = substr($file, $i, 1);
          if ($c =~ /[\000-\037\177-\377]/)
            { printf "\\%03o", ord($c); }
          else
            { print "$c"; }
        }
      print "\n";
      return;
    }

  elsif ($file =~ /\.\.\./)
    {
      print "$Myname: RCS filename with illegal Perforce characters (skipped): $file\n";
      return;
    }

  undef %RCS_lines;
  undef %RCS_Branches;
  undef $Firstusedrev;

  print "========== $dir/$file";

  # This parses the RCS information from the ,v file, filling
  # in various data structures that we use, below.
  #
  if (&set_RCS_revs("$dir/$file", 0) == undef) { print " (empty)\n"; return; } # empty ,v

  # What RCS keyword expansion options are in effect?
  # (We use this to detect binary files)
  #
  $options = "${RCS_expand}$RCS_exec";
  if (! $options) { $options = "-"; }

  @path = split(/\//, "$dir/$file");
  $file = pop(@path);
  $file =~ s/,v$//;
  if ($path[$#path] eq "Attic") { pop @path; }
  $na_dir = join("/", @path);
  $path = sprintf("%s%s%s", $dir, $dir ? "/" : "", $file);
  $na_path = sprintf("%s%s%s", $na_dir, $na_dir ? "/" : "", $file);

  print " ok\n";

  # The "defined($Filesseen{$path})" saves lots of stat()s!
  #
  if (defined($Filesseen{$path}))
    {
      my @p = split(/\//, "$na_path");

      splice(@p, $#p, 0, "Attic");
      my $a_path = join("/", @p);

      #  Users have seen this, which previous caused mysterious death
      #  in the sort phase... let's be a little more informative:
      #
      if (-f "$na_path,v" && -f "$a_path,v")
        { die "assert: CVS repository has both\n  $na_path\nand\n  $a_path"; }
      else
        { die "assert: dofile(): duplicate path: $na_path"; }
    }

  $Filesseen{$path} = 1;

  if ($Prescan) { return; }

  # For all of the branches we see, store the tip revision in
  # $HAVELINES{$line}; this is also where we weed out
  # codelines we are not interested in.
  #
  foreach $line ((keys %RCS_Branchtags), $TRUNKLINE)
    {
      # Note: lines added to the exclude_branches file should
      # give the actual, complete branch tag name, not the 
      # "de-flashed" (if any) rendition.
      #
      if (&exclude("$dir/$file", $line, 1)) { next; }

      $no_flash_line = $line;
      if (defined($BRANCH_FLASH)) { $no_flash_line =~ s/$BRANCH_FLASH$//; }
      if ($WANTLINES && ! (defined($WANTLINES{$no_flash_line}))) { next; }

      if (($tiprev = &rev_on_line($line)) eq "none") { next; }

      $HAVELINES{$line} = $tiprev;
    }

  #  Now we go through each line, to build a list of the RCS revs that
  #  need to be exported into the metadata stream.
  #
  while (1) # We have more lines to deal with...
    {
      (@k) = (keys %HAVELINES);
      if ($#k < 0) { last; }
      my $theline;

      #  Choose the highest numbered line of the lowest "order" for
      #  the next one to export... this will always pick up lines on
      #  branches nearer the trunk first, so the subsequent branches
      #  will have a place to branch from!
      #
      foreach $k (@k)
        {
          if ($k eq $TRUNKLINE) { $theline = $k; last; }

          #  if both lines select the *same* revision...
          #
          if ($HAVELINES{$k} eq $HAVELINES{$theline})
            {
              # ...take the one with the lower branch tag order first
              #
              if (&linerev_gt($RCS_Branchtags{$k}, $RCS_Branchtags{$theline}))
                { $theline = $k; next; }
            }

          if (&linerev_gt($HAVELINES{$k}, $HAVELINES{$theline}))
            { $theline = $k; }
        }

      $rev = $tiprev = &rev_on_line($theline);

      $t = $theline;

      if (defined($BRANCH_FLASH))
        { $t =~ s/$BRANCH_FLASH$//; }

      #  This is where we build the list of codelines we've encountered.
      #
      $All_lines{$t} = 1;

      if (  (defined($RCS_lines{$rev})) &&
            ($theline ne $TRUNKLINE) &&
            (($rev =~ tr/\././) < ($RCS_Branchtags{$theline} =~ tr/\././)))
        {
          if ($RCS_Branches{$rev}) { $RCS_Branches{$rev} .= ":"; }
          $RCS_Branches{$rev} .= $t;
        }
      else
        {
          while ($rev && ! defined($RCS_lines{$rev}))
            {
              $RCS_lines{$rev} = $theline;

              # if we are looking at 1.1.1.n, and it's commit time is less
              # than any 1.2, or there isn't a 1.2 present, then add a "+"
              # to $RCS_lines{$rev}, to so indicate to later stages...
              #
              if ($rev =~ /^1\.1\.1\.(\d+)$/ && $1 >= 2 &&
                    ((! defined($RCS_Revs{"1.2"}) || $RCS_Dates{$rev} < $RCS_Dates{"1.2"})))
                { $RCS_lines{$rev} .= "+"; }
                
              $rev = $RCS_Prevs{$rev};
            }
          if ($rev)
            {
              if ($RCS_Branches{$rev}) { $RCS_Branches{$rev} .= ":"; }
              $RCS_Branches{$rev} .= $t;
            }
        }

      # We test for "if $rev" here cause it may have gone null if the while loop
      # above ran off the end...
      #
      if ($rev && (($rev =~ tr/\././) == 1) && &rev_lt($rev, $Firstusedrev))
        { $Firstusedrev = $rev; }

      delete $HAVELINES{$theline};
    }


  #  OK, we have the set of revisions to export - write them to the
  #  metadata stream.
  #
  foreach $rev (keys %RCS_Revs)
    {
      $revkey = "$path/$rev";

      $state = $RCS_States{$rev};
      $author = $RCS_Authors{$rev};
      $date = $RCS_Dates{$rev};
      my ($yr, $mo, $da, $hr, $mi, $se) = split (/\./, $date);
      $date = timegm($se,$mi,$hr,$da,$mo - 1,$yr);
      $line = $RCS_lines{$rev};
      if (defined($BRANCH_FLASH)) { $line =~ s/$BRANCH_FLASH$//; }
      $branches = $RCS_Branches{$rev};

      #  Detect revisions before the first branch point, and
      #  omit them if we're not doing ALLTHEWAYBACK.
      #
      if ( (! $ALLTHEWAYBACK)
           && ($line eq $TRUNKLINE)
           && ($rev ne $RCS_Tags{"head"})
           && $Firstusedrev
           && &rev_lt($rev, $Firstusedrev))
             { next; }

      if (! $line) { next; }
      if (! $branches) { $branches = "-"; }

      if ((! $ALLTHEWAYBACK) && $rev eq $Firstusedrev)
        { $prevrev = "-"; }
      elsif ($RCS_Prevs{$rev})
        { $prevrev = $RCS_Prevs{$rev}; }
      else
        { $prevrev = "-"; }

      $All_lines{$line} = 1;

      if ($RCS_import_is_main)
        {
          my @btmp = split(/ /, $branches);
          my $newb = "";
          foreach my $b (@btmp)
            {
              if ($newb) { $newb .= " "; }
              $newb .= $b;
            }
          $branches = $newb;
        }

      my ($revpath, $revnum) = ($revkey =~ m/^(.*)\/([^\/]*)$/);

      print METATMP "$revkey$S$date$S$author$S$state$S$line$S$RCS_import_branch$S$branches$S$prevrev$S$options\n";

      #  MAXSZ derives from a ndbm limitation on the size of a key/entry pair.
      #  at (256*3) it allows for a $revkey up to 250 chars or so.
      #
      $logmsg = substr($RCS_Logs{$rev}, 0, $MAXSZ);
      if ($logmsg !~ /\n$/) { $logmsg .= "\n"; }

      if (length($logmsg)+length($revkey) > 1010)
        { print "$Myname: revkey + log too long for <$revkey>\n"; exit 1; }

      if ($RCS_import_is_main && $logmsg eq "Initial revision\n")
        {
          my $logkey = "$revpath/1.1.1.1";
          if (defined($MSGS{$logkey})) { $logmsg = $MSGS{$logkey}; }
        }
      $MSGS{$revkey} = $logmsg;
    }
}

# option switch variables get defaults here...

$Convdir = "";

$Boolopt = 0;
$Valopt = 0;
$Prescan = 0;

while ($#ARGV >= 0)
  {
    if ($ARGV[0] eq "-testtoks")   { &test_rcstoks; }
    if ($ARGV[0] eq "-prescan")    { $Prescan = 1; shift; next; }
    elsif ($ARGV[0] eq "-valopt")
      {
        shift; if ($ARGV[0] < 0) { &usage; }
        $Valopt = $ARGV[0]; shift; next;
      }
    elsif ($ARGV[0] eq "-help")
      { &help; }
    elsif ($ARGV[0] =~ /^-/) { &usage; }
    if ($Args ne "") { $Args .= " "; }
    push(@Args, $ARGV[0]);
    shift;
  }

if ($#Args ne 0) { &usage; }

$Convdir = $Args[0];

$Metatmp   = "$Convdir/metatmp";
$Metadata  = "$Convdir/metadata";
$Labels    = "$Convdir/labels";
$Tags	   = "$Convdir/tags";
$Tagfiles  = "$Convdir/tagfiles";
$Brtags	   = "$Convdir/brtags";
$Logmsgs   = "$Convdir/logmsgs";
$Filesseen = "$Convdir/filesseen";

$Changes   = "$Convdir/changes";
$Revmap    = "$Convdir/revmap";
$Clientdir = "$Convdir/p4";

require "$Convdir/config";

if (! -x "$Mydir/bin/rlog")
  {
    print <<EOM;
$Myname:

  *** This version of $Myname requires a patched version of the RCS
  *** rlog command to be built and installed in $Mydir/bin. Please
  *** see the src/rcs-5.7/src/README and rlog.c.patch files included
  *** in this distribution for further information.

EOM
    exit 1;
  }


&load_excludes();

#  (Handle either f or f.db or f.pag, f,dir style dbs):
#
if (&s("/bin/rm -rf $Logmsgs $Logmsgs.db $Logmsgs.pag $Logmsgs.dir ".
    "$Changes $Clientdir ".
    "$Tags.txt $Tags $Tags.db $Tags.pag $Tags.dir ".
    "$Tagfiles.txt $Tagfiles $Tagfiles.db $Tagfiles.pag $Tagfiles.dir ".
    "$Brtags.txt $Brtags.db $Brtags.pag $Brtags.dir ".
    "$Revmap $Revmap.db $Revmap.pag $Revmap.dir $Labels"))
  { die "/bin/rm -rf $Logmsgs ..."; }

use DB_File;
$DBMCLASS="DB_File";

#$myhashinfo = new DB_File::HASHINFO;
#$myhashinfo->{bsize} = 4096;

$myhashinfo = new DB_File::BTREEINFO;

if (! tie(%Files_seen, $DBMCLASS, $Filesseen, O_CREAT|O_RDWR, 0666, $myhashinfo))
  { print "$Myname: can't tie \"$Filesseen\": $!\n"; exit 1; }

if (! tie(%MSGS, $DBMCLASS, $Logmsgs, O_CREAT|O_RDWR, 0666, $myhashinfo))
  { print "$Myname: can't tie \"$Logmsgs\": $!\n"; exit 1; }

if (! open(LABELS, ">$Labels"))
  { print "$Myname: can't open \">$Labels\": $!\n"; exit 1; }

#  The $Tags hash is keyed by the tag name. It's value is set to the
#  branch tag of the branch it belongs to, iff the mapping can be
#  detemermined by observing that a tagged revision is present in
#  exactly one branch, i.e., has moved beyond the branch's branch
#  point, AND is not selected as the base of some other branch.
#
if (! tie(%Tags, $DBMCLASS, $Tags, O_CREAT|O_RDWR, 0666, $myhashinfo))
  { print "$Myname: can't tie \"$Tags\": $!\n"; exit 1; }

#  The Tagfiles hash remembers for each tag mapped by the heuristic,
#  the file where the mapping was established. This is useful in cases
#  where there are conflicts.
#
if (! tie(%Tagfiles, $DBMCLASS, $Tagfiles, O_CREAT|O_RDWR, 0666, $myhashinfo))
  { print "$Myname: can't tie \"$Tagfiles\": $!\n"; exit 1; }

#  The $Brtags hash is keyed by branch tag name, and the value is the
#  actual branch number.
#
if (! tie(%Brtags, $DBMCLASS, $Brtags, O_CREAT|O_RDWR, 0666, $myhashinfo))
  { print "$Myname: can't tie \"$Brtags\": $!\n"; exit 1; }

if (! open(METATMP, ">$Metatmp"))
  { print "$Myname: can't open \">$Metatmp\": $!\n"; exit 1; }

#chdir $CVS_MODULE || die "$Myname: can't chdir \"$CVS_MODULE\": $!";
#$CVS_MODULE = `/bin/pwd`; chop $CVS_MODULE;
#chdir $Here || die "$Myname: can't chdir \"$Here\": $!";

&traverse($CVS_MODULE, 0, "dofile");

close METATMP;
#close REVTAGS;
untie %MSGS;

if (! open(TAGS, ">$Tags.txt"))
  { print "$Myname: can't open \">$Tags.txt\": $!\n"; exit 1; }
foreach $tag (sort(keys(%Tags)))
  { 
    print TAGS "$tag\t";
    if ($Tags{$tag})
      { print TAGS "$Tags{$tag}\n"; }
    else
      { print TAGS "UNMAPPED-NOTFOUND\n"; }
  }

close TAGS;
print "Wrote $Tags.txt\n";
untie %Tags;
untie %Tagfiles;
&s("rm -f $Tags $Tags.db $Tags.pag $Tags.dir");
&s("rm -f $Tagfiles $Tagfiles.db $Tagfiles.pag $Tagfiles.dir");

if (! open(BRTAGS, ">$Brtags.txt"))
  { print "$Myname: can't open \">$Brtags.txt\": $!\n"; exit 1; }
foreach my $brtag (sort(keys(%Brtags))) { print BRTAGS "$brtag\n"; }
close BRTAGS;
print "Wrote $Brtags.txt\n";
untie %Brtags;
&s("rm -f $Brtags $Brtags.db $Brtags.pag $Brtags.dir");

if ($Prescan) { exit 0; }


sub metasort
{
  my @a = split(/$S/, $a);
  my @b = split(/$S/, $b);

  # The revision time is the primary sort key
  #   - But this is now handled by the external sort; we still
  #     do secondary and tertiary sort keys, below
  #
  #  if ($a[1] != $b[1]) { return $a[1] <=> $b[1]; }

  $a[0] =~ s/^(.*)\///; my $apath = $1;
  $b[0] =~ s/^(.*)\///; my $bpath = $1;

  # Next is the pathname
  #
  if ($apath ne $bpath) { return $apath cmp $bpath; }

  # If we're still tied, it goes to the revision number!
  #
  @aa = split(/\./, $a[0]);
  @bb = split(/\./, $b[0]);

  for (my $i = 0; $i <= $#aa; $i=$i+2)
    {
      if (! defined($bb[$i])) { return 1; }   # a has more positions, thus greater
      if ($aa[$i] < $bb[$i])  { return -1; }  # a is less than b, thus less
      if ($aa[$i] > $bb[$i])  { return 1; }   # and vice-versa

      # if they are equal, we look to the next position:
      #
      if (! defined($aa[$i+1]))
        { die "impossible sort key (RCS rev) \"$a[0]\"?\n"; }
      if (! defined($bb[$i+1]))
        { die "impossible sort key (RCS rev) \"$b[0]\"?\n"; }
      if ($aa[$i+1] < $bb[$i+1])  { return -1; }  # a is less than b, thus less
      if ($aa[$i+1] > $bb[$i+1])  { return 1; }   # and vice-versa

      # Otherwise, we go on to the next level...
    }

  if ($#bb > $#aa) { return -1; }

  die "impossible equal sort keys:\n  <$a>\n  <$b>\n";
}

my $cmd = "sort -n -t $S -k 2 < $Metatmp |";

if (! open(METASORT, $cmd))
  { print "$Myname: can't open \"$cmd\": $!\n"; exit 1; }

if (! open(META, ">$Metadata"))
  { print "$Myname: can't open \">$Metadata\": $!\n"; exit 1; }

# Do the sorting in chunks, per primary sort key. (We're going through
# all of this, BTW, in order to constrain genmetadata's memory
# footprint, which was getting huge when we held all of the tags and
# metadata in-core)
#

my $t = 0;
my @Meta;

while (<METASORT>)
  {
    chomp;
    my @r = split(/$S/, $_);
    if ($r[1] ne $t)
      {
        if ($#Meta >= 0)
          {
            my @Metasorted = sort metasort @Meta;
            foreach my $m (@Metasorted) { print META "$m\n"; }
          }
        $t = $r[1];
        @Meta = ();
      }
    push (@Meta, $_);
  }

if ($#Meta >= 0)
  {
    my @Metasorted = sort metasort @Meta;
    foreach my $m (@Metasorted) { print META "$m\n"; }
  }

close METASORT;
close META;

$Lines  = "$Convdir/lines";
if (! open(LINES, ">$Lines"))
  { print "$Myname: can't open \">$Lines\": $!\n"; }
else
  {
    print       "===== Lines referenced:\n";
    print LINES "===== Lines referenced:\n";
    foreach $line (sort keys %All_lines)
      {
        print       "$line\n";
        print LINES "$line\n";
      }
    close LINES;
  }

exit 0;

#	Change	User	Description
#81	6301	Richard Geiger	These changes resulted from a recent customer conversion. A checkpoint, more or less.
#80	5888	Richard Geiger	This fixes a subtle and heinous bug whereby imports done before a file gets a local change on "main" will be lost (even though the local change will reflect the contents of those "lost" revisions) cc: releng@ironport.com, ehuss@ironport.com
#79	5811	Richard Geiger	I believe this changes handles the problem encountered by Henry Grishashvili at IC Manage.
#78	5673	Richard Geiger	checkpoint ironport latest.
#77	5667	Richard Geiger	fix a/na dup detection.
#76	5656	Richard Geiger	Another fix for cases where TRUNKLINE is not head.
#75	5654	Richard Geiger	Take care of David Birkhead's first two problem children :-)
#74	5651	Richard Geiger	Cosmetic only
#73	5647	Richard Geiger	fixes for case of trunkline set to a branch (! the trunk) where a file is initially added.
#72	5633	Richard Geiger	Handle the "filename" attribute that some RCS/CVSes apparently create these days... Also, be sure to wipe the Convdir/p4root before genmetadata does it's thing, so as to remove any ,v files that genmetadata might find there. Also for general hygiene!
#71	5615	Richard Geiger	more depot mapping fixes.
#70	5588	Richard Geiger	checkpoint the latest. This includes a rework of the label-heursitical stuff that seems to work better.
#69	5587	Richard Geiger	Handle \r's in filenames by xlating to "%0d".
#68	5585	Richard Geiger	do complete branch determination in bin/genmetadata now... heh.
#67	5583	Richard Geiger	Handle "..." in CVS pathnames by changing them to ",,,"s. "Works for me!"
#66	5581	Richard Geiger	pull out unintended leftover debug cruft.
#65	5580	Richard Geiger	Tweaks & debugging fixes from the IP 2006/07/06 trial.
#64	5570	Richard Geiger	decruftification.
#63	5568	Richard Geiger	If we are doing BRANCH_FLASH, do it consistently BEFORE using the un-de-flashed (!) value anywhere else!
#62	5567	Richard Geiger	Seems pretty close to right at this point. This change makes sure that we handle labels on "ambiguous" revisions (any 1.1.1.n, n > 1 revisions commited before any 1.2) as being present in both the import branch AND main correctly.
#61	5563	Richard Geiger	Life is a corner case. "UNMAPPED-COLLISION tags in tags.txt now indicate what collided better. Fix srcdiff to handle odd Log expansion corner case that was causing flase positives.
#60	5555	Richard Geiger	NOEXC_PATHNAME
#59	5531	Richard Geiger	A significant checkpoint commit, with new improved handling of import vendor branches, and revisions present in main by virtue of multiple vendor drops to a file with no local mods. test/runtest works, with new refernece results pretty well scrutinized.
#58	5500	Richard Geiger	Fix handling of special chars in filenames with new rlog-based parsing.
#57	5498	Richard Geiger	Eeeks, where did this line go!?
#56	5497	Richard Geiger	checkpoint
#55	5495	Richard Geiger	Slight change to parsing rlog output, for better tolerance of people pasting rlog output into log messages. This also depend on the new rlog...
#54	5494	Richard Geiger	Heck, it's turning into 3.0! This is a watershed commit - switch to rlog-based ,v parsing...
#53	5490	Richard Geiger	$Depotmap implemented.
#52	5489	Richard Geiger	New checkpoint; now has multi-mod exclude_* files.
#51	5485	Richard Geiger	good checkpoint
#50	5484	Richard Geiger	checkpoint
#49	5483	Richard Geiger	The latest formula... ...and a typo.
#48	5480	Richard Geiger	Better vendor-branch handling... it's tricky! genmetadata now includes a flag in each line of the generated $Convdir/labels to indicate whether the file in question had "$Rcs_import_is_main" set. (The file had "branch: 1.1.1" as the default branch).
#47	5477	Richard Geiger	Fix bug in prescan mode.
#46	5476	Richard Geiger	I am import, hear me roar.
#45	5466	Richard Geiger	allow leading whitespace and/or trailing # comments in exclude_* lists.
#44	5465	Richard Geiger	Add exclude list for tags & branches.
#43	5442	Richard Geiger	A checkpoint commit on the way to a 2.6.0 release with the new IronPort inspired improvements.
#42	5437	Richard Geiger	just another checkpoint. Passed test/runtest, but still has debugging cruft in. Not fit for release!
#41	5430	Richard Geiger	This is another "checkpoint" commit. It significantly rearranges how labels are done, so as to use a hueristic to divine label<->branch identifications. Not intended for release without further testing and tweakage!
#40	5428	Richard Geiger	A checkpoint commit; this adds a first stab attempt at a global (across all files) hueristic for determining the tag<->branchtag mapping. It seem to be working, but has not been deeply tested yet at all. There is also debugging cruft that should be removed before this goes into a release. Also, ALL of the big (per-file or more) hashes now get stored as tie'ed databases, in order to deal with memory exhaustion when dealing with BIG data sets.
#39	5426	Richard Geiger	Add compilation of the tags and branch tags encountered.
#38	5392	Richard Geiger	Adjust sort key specifier option to avoid the archiaic form.
#37	5272	Richard Geiger	Allow "."s in author identifiers
#36	5199	Richard Geiger	hush $CO .adamci
#35	5198	Richard Geiger	First cut "ADAM" support for Intel Austin.
#34	5143	Richard Geiger	prep for 2.5.5
#33	5091	Richard Geiger	Fix delta processing - some labels (with nonnumerics following numerics) could otherwise confuse the parsing.
#32	4917	Richard Geiger	in PureRCS, always use the RCS branch number in the depot path. even for the #1 revs (which really point to it in RCS).
#31	4914	Richard Geiger	Adds PureRCS switch.
#30	4732	Richard Geiger	Changes to support special characters # @ % * (for release 2.5)
#29	4296	Richard Geiger	Integrate Robert Cowham's fox for the "binary slows"... In Cowham we Trust!
#28	4270	Richard Geiger	Handle symbols name starting with a leading "num".
#27	3708	Richard Geiger	Changes for 2.3.6
#26	2376	Richard Geiger	First show at fixing RCS/"import" confusion...
#25	2284	Richard Geiger	Package 2.3.3. Changes to begin handling MKS Source Integrity repositories. Today MKS; Tomorrow... ClearCase! (well).
#24	2239	Richard Geiger	Ignore "ext" and "format" RCS keywords. These are apparently added by MKS's rendition of RCS. Ug.
#23	2061	Richard Geiger	changes for 2.3.2: - can adjust db hash bucket size; - Add $DEPOT config variable - Handle labels with '#' or '@'
#22	1987	Richard Geiger	Changes for 2.3.1
#21	1942	Richard Geiger	Change to handle RCS branch tags (so this tools can work with RCS (vs CVS) repositories, too!) RCS branch tags are those with an even number of "."s.
#20	1781	Richard Geiger	This change reintegrates cvs2p4 2.0 developement work (through 2.0b6) back into my mainline development.
#19	1437	Richard Geiger	Fix for 1.3.3 - labels on revived Attic files.
#18	1404	Richard Geiger	Oops, fix a bug ni the sort re-do from the last change: the external sort needs a -n. You'd think it would be smart enought to know what I want. Sheesh. :-)
#17	1388	Richard Geiger	Put genmetadata on a memory diet.
#16	1203	Richard Geiger	Fix bug where dolables couldn't cope with tag in which the revision for a file was a delete Add the IMPORTTAGSPOOF switch.
#15	1185	Richard Geiger	Changes for 1.3 (Labels!)
#14	1031	Richard Geiger	Changes for 1.2.17; fix one-letter id internal error bug.
#13	823	Richard Geiger	Add assert for dup d/f,v d/Attic/f,v (like "Giao Phan" <giao@seven.com> saw)
#12	474	Richard Geiger	Reject files with bad characters per perforce filenaming conventions.
#11	459	Richard Geiger	Now performs metadata sort using a sort routine coded directly in perl, rather than by using the host system's "sort" command. (Differences in "sort" behavior from one host to another had been observed to cause irregularities).
#10	416	Richard Geiger	Pull in Thomas Quinot <quinot@inf.enst.fr>'s UTC bugfix, for 1.2.12.
#9	398	Richard Geiger	Skip (and note) ,v files with nonprintable characters in the fileame.
#8	392	Richard Geiger	CHanges for 1.2.10 (tolerate empty RCS file)
#7	342	Richard Geiger	Allow for "." in "id" symbols.
#6	330	Richard Geiger	This change allows cvs2p4 to cope with RCS archives with CR/LF line endings. (I'm not sure how these get created; presumably some weird side effect of Bill Gates. But one user had 'em; RCS seems to cope with 'em, and so I've decided to make cvs2p4 follow suit.
#5	305	Richard Geiger	Changes for 1.2.7
#4	249	Richard Geiger	Changes in preparation for supporting spaces in filenames. (In fact, this may work as of this change, but is not yet tested.) Also, add "runtest -gengood" to allow easier generatino of new *.good files. (It just doesn't quick on a miscompare!).
#3	240	Richard Geiger	Version 1.2.5, to account for post-1999 RCS behavior. (Courtesy of David Simon, Goldman Sachs)
#2	179	Richard Geiger	CHanges for 1.2.3
#1	130	Richard Geiger	CVS-to-Perforce converter. This is release 1.2.2 (first submit to the Perforce Public Depot)