package VCP::Source::revml ; =head1 NAME VCP::Source::revml - Outputs versioned files to a revml file =head1 SYNOPSIS vcp revml[:<source>] vcp revml[:<source>] --dtd <dtd> Where <source> is a filename for input; or missing or '-' for STDIN. To compile a DTD in to a perl module: revml: --dtd <dtd> --save-doctype =head1 DESCRIPTION This source driver allows L<vcp|vcp> to read a RevML file. For now, all revisions are fully reconstituted in the working directory in order to make sure that all of the patches apply cleanly. This can require a huge amount of disk space, but it works (optimizing this is on the TODO). =cut use strict ; use Carp ; use Fcntl ; use Getopt::Long ; use Digest::MD5 ; use MIME::Base64 ; use RevML::Doctype ; use Symbol ; use UNIVERSAL qw( isa ) ; use XML::Parser ; use Time::Local qw( timegm ) ; use VCP::Debug ':debug' ; use VCP::Patch ; use VCP::Rev ; use vars qw( $VERSION $debug ) ; $VERSION = 0.1 ; $debug = 0 ; use base 'VCP::Source' ; use fields ( 'COPY_MODE', ## TRUE to do a copy, FALSE if not (like when writing .pm DTD) 'DOCTYPE', 'HEADER', ## The $header is held here until the first <rev> is read 'IN_FH', ## The handle of the input revml file 'IN_NAME', ## The name of the input revml file, or '-' for stdout 'WORK_NAME', ## The name of the working file (diff or content) 'WORK_FH', ## The filehandle of working file 'REV', ## The VCP::Rev containing all of this rev's meta info 'STACK', ## A stack of currently open elements 'UNDECODED_CONTENT', ## Base64 content waiting to be decoded. ) ; #=item new # #Creates a new instance. The only parameter is '-dtd', which overrides #the default DTD found by searching for modules matching RevML::DTD:v*.pm. # #Attempts to open the input file if one is specified. # #If the option '--save-doctype' is passed, then no copying of resources #is done (queue_all returns nothing to copy) and the doctype is saved #as a .pm file. See L<RevML::Doctype> for details. # #=cut sub new { my $class = shift ; $class = ref $class || $class ; my VCP::Source::revml $self = $class->SUPER::new( @_ ) ; $self->{COPY_MODE} = 1 ; my ( $spec, $args ) = @_ ; my $parsed_spec = $self->parse_repo_spec( $spec ) ; my $save_doctype ; { local *ARGV = $args ; GetOptions( 'dtd|version' => sub { $self->{DOCTYPE} = RevML::Doctype->new( shift @$args ) ; }, 'save-doctype' => \$save_doctype, ) or $self->usage_and_exit ; } $self->{DOCTYPE} = RevML::Doctype->new unless $self->{DOCTYPE} ; if ( $save_doctype ) { $self->{COPY_MODE} = 0 ; $self->{DOCTYPE}->save_as_pm ; } my @errors ; my $files = $parsed_spec->{FILES} ; $self->{IN_NAME} = defined $files && length $files ? $files : '-' ; if ( $self->{IN_NAME} eq '-' ) { $self->{IN_FH} = \*STDIN ; ## TODO: Check IN_FH for writability when it's set to STDIN } else { require Symbol ; $self->{IN_FH} = Symbol::gensym ; open( $self->{IN_FH}, "<$self->{IN_NAME}" ) or die "$!: $self->{IN_NAME}\n" ; } $self->{WORK_FH} = Symbol::gensym ; die join( '', @errors ) if @errors ; return $self ; } sub dest_expected { my VCP::Source::revml $self = shift ; return $self->{COPY_MODE} ; } sub handle_header { my VCP::Source::revml $self = shift ; ## Save this off until we get our first rev from the input $self->{HEADER} = shift ; return ; } sub get_rev { my VCP::Source::revml $self = shift ; my VCP::Rev $r ; ( $r ) = @_ ; } sub copy_revs { my VCP::Source::revml $self = shift ; $self->revs( VCP::Revs->new ) ; $self->parse_revml_file ; $self->dest->sort_revs( $self->revs ) ; my VCP::Rev $r ; while ( $r = $self->revs->shift ) { $self->get_rev( $r ) ; $self->dest->handle_rev( $r ) ; } } sub parse_revml_file { my VCP::Source::revml $self = shift ; my @stack ; $self->{STACK} = \@stack ; my $char_handler = sub { my $expat = shift ; my $pelt = $stack[-1] ; ## parent element my $tag = $pelt->{NAME} ; my $content = $pelt->{CONTENT} ; if ( defined $content ) { if ( @$content && $content->[-1]->{TYPE} eq 'PCDATA' ) { $content->[-1]->{PCDATA} .= $_[0] ; } else { push @$content, { TYPE => 'PCDATA', PCDATA => $_[0] } ; } } my $sub = "${tag}_characters" ; $self->$sub( @_ ) if $self->can( $sub ) ; } ; my $p = XML::Parser->new( Handlers => { Start => sub { my $expat = shift ; my $tag = shift ; if ( $tag eq "char" ) { while ( @_ ) { my ( $attr, $value ) = ( shift, shift ) ; #print STDERR $value, "=" ; if ( $attr eq "code" ) { if ( $value =~ s{^0x}{} ) { $value = chr( hex( $value ) ) ; } else { $value = chr( $value ) ; } #print STDERR ord $value, "\n" ; $char_handler->( $expat, $value ) ; } } return ; } #print STDERR "<$tag>\n" ; push @stack, { NAME => $tag, ATTRS => {@_}, CONTENT => ! $self->can( "${tag}_characters" ) ? [] : undef, } ; my $sub = "start_$tag" ; $self->$sub( @_ ) if $self->can( $sub ) ; }, End => sub { my $expat = shift ; my $tag = shift ; return if $tag eq "char" ; #print STDERR "</$tag>\n" ; die "Unexpected </$tag>, expected </$stack[-1]>\n" unless $tag eq $stack[-1]->{NAME} ; my $sub = "end_$tag" ; $self->$sub( @_ ) if $self->can( $sub ) ; my $elt = pop @stack ; if ( @stack && $stack[-1]->{NAME} =~ /^rev(ml)?$/ && defined $elt->{CONTENT} ) { #print STDERR "</$tag>\n" ; ## Save all the meta fields for start_content() or start_diff() if ( $tag eq 'label' ) { push @{$stack[-1]->{labels}}, $elt ; } else { $stack[-1]->{$tag} = $elt ; } } }, Char => $char_handler, }, ) ; $p->parse( $self->{IN_FH} ) ; } sub start_rev { my VCP::Source::revml $self = shift ; ## We now have all of the header info parsed, save it off ## TODO: Demystify this hairy wart. Better yet, simplify all the code ## in this module. It needs to decode the fields as they come in and ## stick them in the header and the rev_meta for ( map( $self->{STACK}->[-2]->{$_}, grep /^[a-z_0-9]+$/, keys %{$self->{STACK}->[-2]} ) ) { $self->{HEADER}->{$_->{NAME}} = $_->{CONTENT}->[0]->{PCDATA} ; } ## Make sure no older rev is lying around to confuse us. $self->{REV} = undef ; } ## RevML is contstrained so that the diff and content tags are after all of ## the meta info for a revision. And we really don't want to hold ## the entire content of a file in memory, in case it's large. So we ## intercept start_content and start_diff and initialize the REV ## member as well as opening a place to catch all of the data that gets ## extracted from the file. sub init_rev_meta { my VCP::Source::revml $self = shift ; my $rev_elt = $self->{STACK}->[-2] ; my VCP::Rev $r = VCP::Rev->new() ; ## All revml tag naes are lc, all internal data member names are uc #require Data::Dumper ; print Data::Dumper::Dumper( $self->{STACK} ) ; for ( grep /^[a-z_0-9]+$/, keys %$rev_elt ) { if ( $_ eq 'labels' ) { $r->labels( map $_->{CONTENT}->[0]->{PCDATA}, @{$rev_elt->{labels}} ) ; } else { ## We know that all kids *in use today* of <rev> are pure PCDATA ## Later, we'll need sub-attributes. ## TODO: Flatten the element tree by preficing attribute names ## with, I dunno, say '_' or by adding '_attr' to them. my $out_key = $_ ; $r->$out_key( $rev_elt->{$_}->{CONTENT}->[0]->{PCDATA} ) ; } } #require Data::Dumper ; print Data::Dumper::Dumper( $r ) ; $r->work_path( $self->work_path( $r->name, $r->rev_id ) ) ; $self->mkpdir( $r->work_path ) ; $self->{REV} = $r ; return ; } sub start_delete { my VCP::Source::revml $self = shift ; $self->init_rev_meta ; $self->{REV}->action( "delete" ) ; ## Clear the work_path so that VCP::Rev doesn't try to delete it. $self->{REV}->work_path( undef ) ; } sub start_move { my VCP::Source::revml $self = shift ; $self->init_rev_meta ; $self->{REV}->action( "move" ) ; ## Clear the work_path so that VCP::Rev doesn't try to delete it. $self->{REV}->work_path( undef ) ; die "<move> unsupported" ; } sub start_content { my VCP::Source::revml $self = shift ; $self->init_rev_meta ; #require Data::Dumper ; print Data::Dumper::Dumper( $self->{REV} ) ; $self->{REV}->action( "edit" ) ; $self->{WORK_NAME} = $self->{REV}->work_path ; $self->{UNDECODED_CONTENT} = "" ; sysopen $self->{WORK_FH}, $self->{WORK_NAME}, O_WRONLY | O_CREAT | O_TRUNC or die "$!: $self->{WORK_NAME}" ; ## The binmode here is to make sure we don't convert \n to \r\n and ## to allow ^Z out the door (^Z is EOF on windows, and they take those ## things rather more seriously there than on Unix). binmode $self->{WORK_FH}; } sub content_characters { my VCP::Source::revml $self = shift ; if ( $self->{STACK}->[-1]->{ATTRS}->{encoding} eq "base64" ) { $self->{UNDECODED_CONTENT} .= shift ; if ( $self->{UNDECODED_CONTENT} =~ s{(.*\n)}{} ) { syswrite( $self->{WORK_FH}, decode_base64( $1 ) ) or die "$! writing $self->{WORK_NAME}" ; } } elsif ( $self->{STACK}->[-1]->{ATTRS}->{encoding} eq "none" ) { # print STDERR map( sprintf( " %02x=$_", ord ), $_[0] =~ m/(.)/gs ), "\n" ; syswrite $self->{WORK_FH}, $_[0] or die "$! writing $self->{WORK_NAME}" ; } else { die "vcp: unknown encoding '$self->{STACK}->[-1]->{ATTRS}->{encoding}'\n"; } return ; } sub end_content { my VCP::Source::revml $self = shift ; if ( length $self->{UNDECODED_CONTENT} ) { syswrite( $self->{WORK_FH}, decode_base64( $self->{UNDECODED_CONTENT} ) ) or die "$! writing $self->{WORK_NAME}" ; } close $self->{WORK_FH} or die "$! closing $self->{WORK_NAME}" ; if ( $self->none_seen ) { #require Data::Dumper ; print Data::Dumper::Dumper( $self->{HEADER} ) ; $self->dest->handle_header( $self->{HEADER} ) ; } $self->seen( $self->{REV} ) ; } sub start_delta { my VCP::Source::revml $self = shift ; $self->init_rev_meta ; my $r = $self->{REV} ; $r->action( 'edit' ) ; $self->{WORK_NAME} = $self->work_path( $r->name, 'delta' ) ; sysopen $self->{WORK_FH}, $self->{WORK_NAME}, O_WRONLY | O_CREAT | O_TRUNC or die "$!: $self->{WORK_NAME}" ; ## See comment in start_content :) binmode $self->{WORK_FH}; } ## TODO: Could keep deltas in memory if they're small. *delta_characters = \&content_characters ; ## grumble...name used once warning...grumble *delta_characters = \&content_characters ; sub end_delta { my VCP::Source::revml $self = shift ; close $self->{WORK_FH} or die "$! closing $self->{WORK_NAME}" ; #print STDERR `hexdump -cx $self->{WORK_NAME}` ; my VCP::Rev $r = $self->{REV} ; ## Delay sending handle_header to dest until patch succeeds. my $is_first = $self->none_seen ; my VCP::Rev $saw = $self->seen( $r ) ; die "No original content to patch for ", $r->name, ",", " revision ", $r->rev_id unless defined $saw ; if ( -s $self->{WORK_NAME} ) { ## source fn, result fn, patch fn vcp_patch( $saw->work_path, $r->work_path, $self->{WORK_NAME} ); unless ( $ENV{VCPNODELETE} ) { unlink $self->{WORK_NAME} or warn "$! unlinking $self->{WORK_NAME}\n" ; } } else { ## TODO: Don't assume working link() debug "vcp: linking ", $saw->work_path, ", ", $r->work_path if debugging $self ; link $saw->work_path, $r->work_path or die "vcp: $! linking ", $saw->work_path, ", ", $r->work_path } if ( $is_first ) { #require Data::Dumper ; print Data::Dumper::Dumper( $self->{HEADER} ) ; $self->dest->handle_header( $self->{HEADER} ) ; } } ## Convert ISO8601 UTC time to local time since the epoch sub end_time { my VCP::Source::revml $self = shift ; my $timestr = $self->{STACK}->[-1]->{CONTENT}->[0]->{PCDATA} ; ## TODO: Get parser context here & give file, line, and column. filename ## and rev, while we're scheduling more work for the future. confess "Malformed time value $timestr\n" unless $timestr =~ /^\d\d\d\d(\D\d\d){5}/ ; confess "Non-UTC time value $timestr\n" unless substr $timestr, -1 eq 'Z' ; my @f = split( /\D/, $timestr ) ; --$f[1] ; # Month of year needs to be 0..11 $self->{STACK}->[-1]->{CONTENT}->[0]->{PCDATA} = timegm( reverse @f ) ; } # double assign => avoid used once warning *end_mod_time = *end_mod_time = \&end_time ; ## TODO: Verify that we should be using a Base64 encoded MD5 digest, ## according to <delta>'s attributes. Oh, and same goes for <content>'s ## encoding. ## TODO: workaround backfilling if the destination is revml, since ## it can't put the original content in place. We'll need to flag ## some kind of special pass-through mode for that. sub end_digest { my VCP::Source::revml $self = shift ; $self->init_rev_meta unless defined $self->{REV} ; my $r = $self->{REV} ; my $original_digest = $self->{STACK}->[-1]->{CONTENT}->[0]->{PCDATA} ; my $d = Digest::MD5->new() ; if ( $r->is_base_rev ) { $self->dest->handle_header( $self->{HEADER} ) if $self->none_seen ; ## Don't bother checking the digest if the destination returns ## FALSE, meaning that a backfill is not possible with that destination. ## VCP::Dest::revml does this. return unless $self->dest->backfill( $r ) ; my VCP::Rev $saw = $self->seen( $r ) ; warn "I've seen ", $r->name, " before" if $saw ; } my $work_path = $r->work_path ; sysopen F, $work_path, O_RDONLY or die "vcp: $! opening '$work_path' for digestion\n" ; ## See comment for binmode in start_content :) binmode F; $d->addfile( \*F ) ; close F ; my $reconstituted_digest = $d->b64digest ; ## TODO: provide an option to turn this in to a warning ## TODO: make this abort writing anything to the dest, but continue ## processing, so as to deliver as many error messages as possible. unless ( $original_digest eq $reconstituted_digest ) { my $reject_file_name = $r->name ; $reject_file_name =~ s{[^A-Za-z0-9 -.]+}{-}g ; $reject_file_name =~ s{^-+}{}g ; my $reject_file_path = File::Spec->catfile( File::Spec->tmpdir, $reject_file_name ) ; link $work_path, $reject_file_path or die "vcp: digest check failed for ", $r->as_string, "\nvcp: failed to leave copy in '$reject_file_path': $!\n" ; die "vcp: digest check failed for ", $r->as_string, "\nvcp: copy left in '$reject_file_path'\n", "got digest: $reconstituted_digest\n", "expected digest: $original_digest\n"; } } ## Having this and no sub rev_characters causes the parser to accumulate ## content. sub end_rev { my VCP::Source::revml $self = shift ; $self->revs->add( $self->{REV} ) unless $self->{REV}->is_base_rev ; ## Release this rev. $self->{REV} = undef ; } =head1 AUTHOR Barrie Slaymaker <barries@slaysys.com> =head1 COPYRIGHT Copyright (c) 2000, 2001, 2002 Perforce Software, Inc. All rights reserved. See L<VCP::License|VCP::License> (C<vcp help license>) for the terms of use. =cut 1 ;
# | Change | User | Description | Committed | |
---|---|---|---|---|---|
#1 | 1375 | Sean McCune | Creating my own branch for work on vcp. | ||
//guest/perforce_software/revml/lib/VCP/Source/revml.pm | |||||
#9 | 1367 | Barrie Slaymaker | lots of docco updates | ||
#8 | 1358 | Barrie Slaymaker | Win32 changes | ||
#7 | 1175 | Barrie Slaymaker | Implement VCP::Patch, roll 0.26 release. | ||
#6 | 1022 | Barrie Slaymaker |
Perl "$foo\_..." => "${foo}_..." cleanup by Peter Prymmer <PPrymmer@factset.com>. |
||
#5 | 628 | Barrie Slaymaker | Cleaned up POD in bin/vcp, added BSD-style license. | ||
#4 | 480 | Barrie Slaymaker |
0.06 Wed Dec 20 23:19:15 EST 2000 - bin/vcp: Added --versions, which loads all modules and checks them for a $VERSION and print the results out. This should help with diagnosing out-of-sync modules. - Added $VERSION vars to a few modules :-). Forgot to increment any $VERSION strings. - VCP::Dest::cvs: The directory "deeply" was not being `cvs add`ed on paths like "a/deeply/nested/file", assuming "deeply" had no files in it. - VCP::Dest::revml: fixed a bug that was causing files with a lot of linefeeds to be emitted in base64 instead of deltaed. This means most text files. - Various minor cleanups of diagnostics and error messages, including exposing "Can't locate Foo.pm" when a VCP::Source or VCP::Dest module depends on a module that's not installed, as reported by Jeff Anton. |
||
#3 | 478 | Barrie Slaymaker |
0.05 Mon Dec 18 07:27:53 EST 2000 - Use `p4 labels //...@label` command as per Rober Cowham's suggestion, with the '-s' flag recommended by Christopher Siewald and Amaury.FORGEOTDARC@atsm.fr. Though it's actually something like vcp: running /usr/bin/p4 -u safari -c safari -p localhost:5666 -s files //.../NtLkly //...@compiler_a3 //.../NtLkly //...@compiler_may3 and so //on //for 50 parameters to get the speed up. I use the //.../NtLkly "file" as //a separator between the lists of files in various //revisions. Hope nobody has any files named that :-). What I should do is choose a random label that doesn't occur in the labels list, I guess. - VCP::Source::revml and VCP::Dest::revml are now binary, control code, and "hibit ASCII" (I know, that's an oxymoron) clean. The <comment>, <delta>, and <content> elements now escape anything other than tab, line feed, space, or printable chars (32 <= c <= ASCII 126) using a tag like '<char code="0x09">'. The test suite tests all this. Filenames should also be escaped this way, but I didn't get to that. - The decision whether to do deltas or encode the content in base64 is now based on how many characters would need to be escaped. - We now depend on the users' diff program to have a "-a" option to force it to diff even if the files look binary to it. I need to use Diff.pm and adapt it for use on binary data. - VCP::Dest::cvs now makes sure that no two consecutive revisions of the same file have the same mod_time. VCP::Source::p4 got so fast at pulling revisions from the repositories the test suite sets up that CVS was not noticing that files had changed. - VCP::Plugin now allows you to set a list of acceptable result codes, since we now use p4 in ways that make it return non-zero result codes. - VCP::Revs now croaks if you try to add two entries of the same VCP::Rev (ie matching filename and rev_id). - The <type> tag is now limited to "text" or "binary", and is meant to pass that level of info between foreign repositories. - The <p4_info> on each file now carries the one line p4 description of the file so that p4->p4 transferes can pick out the more detailed info. VCP::Source::p4, VCP::Dest::p4 do this. - VCP::{Source,Dest}::{p4,cvs} now set binaryness on added files properly, I think. For p4->p4, the native p4 type is preserved. For CVS sources, seeing the keyword substitution flag 'o' or 'b' implies binaryness, for p4, seeing a filetype like qr/u?x?binary/ or qr/x?tempobj/ or "resource" implies binaryness (to non-p4 destinations). NOTE: Seeing a 'o' or 'b' in a CVS source only ends up setting the 'b' option on the destination. That should be ok for most uses, but we can make it smarter for cvs->cvs transfers if need be. |
||
#2 | 468 | Barrie Slaymaker |
- VCP::Dest::p4 now does change number aggregation based on the comment field changing or whenever a new revision of a file with unsubmitted changes shows up on the input stream. Since revisions of files are normally sorted in time order, this should work in a number of cases. I'm sure we'll need to generalize it, perhaps with a time thresholding function. - t/90cvs.t now tests cvs->p4 replication. - VCP::Dest::p4 now doesn't try to `p4 submit` when no changes are pending. - VCP::Rev now prevents the same label from being applied twice to a revision. This was occuring because the "r_1"-style label that gets added to a target revision by VCP::Dest::p4 could duplicate a label "r_1" that happened to already be on a revision. - Added t/00rev.t, the beginnings of a test suite for VCP::Rev. - Tweaked bin/gentrevml to comment revisions with their change number instead of using a unique comment for every revision for non-p4 t/test-*-in-0.revml files. This was necessary to test cvs->p4 functionality. |
||
#1 | 467 | Barrie Slaymaker | Version 0.01, initial checkin in perforce public depot. |