package VCP::Dest::revml ; =head1 NAME VCP::Dest::revml - Outputs versioned files to a revml file =head1 SYNOPSIS ## revml output class: revml:[<output-file>] revml:[<output-file>] --dtd <revml.dtd> revml:[<output-file>] --version <version> revml:[<output-file>] --sort-by=name,rev =head1 DESCRIPTION The --dtd and --version options cause the output to be checked against a particular version of revml. This does I<not> cause output to be in that version, but makes sure that output is compliant with that version. =head1 EXTERNAL METHODS =over =cut use strict ; use Carp ; use Digest::MD5 ; use Fcntl ; use Getopt::Long ; use MIME::Base64 ; use RevML::Doctype ; use RevML::Writer ; use Symbol ; use UNIVERSAL qw( isa ) ; use VCP::Rev ; use Text::Diff ; use vars qw( $VERSION $debug ) ; $VERSION = 0.1 ; $debug = 0 ; use base 'VCP::Dest' ; use fields ( 'OUT_NAME', ## The name of the output file, or '-' for stdout 'OUT_FH', ## The handle of the output file 'WRITER', ## The XML::AutoWriter instance write with ) ; =item new Creates a new instance. The only parameter is '-dtd', which overrides the default DTD found by searching for modules matching RevML::DTD:v*.pm. Attempts to create the output file if one is specified. =cut sub new { my $class = shift ; $class = ref $class || $class ; my VCP::Dest::revml $self = $class->SUPER::new( @_ ) ; my @errors ; my ( $spec, $options ) = @_ ; my $parsed_spec = $self->parse_repo_spec( $spec ) ; my $file_name = $parsed_spec->{FILES} ; $self->{OUT_NAME} = defined $file_name && length $file_name ? $file_name : '-' ; if ( $self->{OUT_NAME} eq '-' ) { $self->{OUT_FH} = \*STDOUT ; ## TODO: Check OUT_FH for writability when it's set to STDOUT } else { require Symbol ; $self->{OUT_FH} = Symbol::gensym ; ## TODO: Provide a '-f' force option open( $self->{OUT_FH}, ">$self->{OUT_NAME}" ) or die "$!: $self->{OUT_NAME}" ; } ## BUG: Can't undo this AFAIK, so we're permanently altering STDOUT ## if OUT_NAME eq '-'. binmode $self->{OUT_FH}; my $doctype ; my @sort_spec ; { local *ARGV = $options ; GetOptions( 'dtd|version' => sub { $doctype = RevML::Doctype->new( shift @$options ) ; }, "k|sort-by=s" => \@sort_spec, ) or $self->usage_and_exit ; } $self->set_sort_spec( @sort_spec ) if @sort_spec ; $doctype = RevML::Doctype->new unless $doctype ; die join( '', @errors ) if @errors ; $self->writer( RevML::Writer->new( DOCTYPE => $doctype, OUTPUT => $self->{OUT_FH}, ) ); return $self ; } sub _ISO8601(;$) { my @f = reverse( ( @_ ? gmtime( shift ) : gmtime )[0..5] ) ; $f[0] += 1900 ; $f[1] ++ ; ## Month of year needs to be 1..12 return sprintf( "%04d-%02d-%02d %02d:%02d:%02dZ", @f ) ; } sub _emit_characters { my ( $w, $buf ) = @_ ; $w->setDataMode( 0 ) ; ## Note that we don't let XML munge \r to be \n!! while ( $$buf =~ m{\G(?: ( [\x00-\x08\x0b-\x1f\x7f-\xff]) | ([^\x00-\x08\x0b-\x1f\x7f-\xff]*) )}gx ) { if ( defined $1 ) { $w->char( "", code => sprintf( "0x%02x", ord $1 ) ) ; } else { $w->characters( $2 ) ; } } } sub handle_rev { my VCP::Dest::revml $self = shift ; my VCP::Rev $r ; ( $r ) = @_ ; my $w = $self->writer ; if ( $self->none_seen ) { $w->setDataMode( 1 ) ; $w->xmlDecl ; my $h = $self->header ; ## VCP::Source::revml passes through the original date. Other sources ## don't. $w->time( defined $h->{time} ? _ISO8601 $h->{time} : _ISO8601 ) ; $w->rep_type( $h->{rep_type} ) ; $w->rep_desc( $h->{rep_desc} ) ; $w->rev_root( $h->{rev_root} ) ; } my VCP::Rev $saw = $self->seen( $r ) ; ## If there's no work path for the current file, keep the previous one. ## This is a cheat that allows us to diff against the last known version ## if a file is deleted and then re-added. Without this line, we would ## have to include the new version of the file. $self->seen( $saw ) if $saw && ! defined $r->work_path ; my $fn = $r->name ; my $is_base_rev = $r->is_base_rev ; die( "Saw '", $saw->as_string, "', but found a later base rev '" . $r->as_string, "'" ) if $saw && $is_base_rev ; $w->start_rev ; $w->name( $fn ) ; $w->type( $r->type ) ; $w->p4_info( $r->p4_info ) if defined $r->p4_info ; $w->cvs_info( $r->cvs_info ) if defined $r->cvs_info ; $w->rev_id( $r->rev_id ) ; $w->change_id( $r->change_id ) if defined $r->change_id ; $w->time( _ISO8601 $r->time ) if ! $is_base_rev || defined $r->time ; $w->mod_time( _ISO8601 $r->mod_time ) if defined $r->mod_time ; $w->user_id( $r->user_id ) if ! $is_base_rev || defined $r->time ; ## Sorted for readability & testability $w->label( $_ ) for sort $r->labels ; if ( defined $r->comment && length $r->comment ) { $w->start_comment ; my $c = $r->comment ; _emit_characters( $w, \$c ) ; $w->end_comment ; $w->setDataMode( 1 ) ; } my $convert_crs = $^O =~ /Win32/ && $r->type eq "text" ; my $digestion ; my $close_it ; my $cp = $r->work_path ; if ( $is_base_rev ) { sysopen( F, $cp, O_RDONLY ) or die "$!: $cp\n" ; binmode F ; $digestion = 1 ; $close_it = 1 ; } elsif ( $r->action eq 'delete' ) { $w->delete() ; $self->delete_seen( $r ) ; } else { sysopen( F, $cp, O_RDONLY ) or die "$!: $cp\n" ; ## need to binmode it so ^Z can pass through, need to do \r and ## \r\n -> \n conversion ourselves. binmode F ; $close_it = 1 ; my $buf ; my $read ; my $has_nul ; my $total_char_count = 0 ; my $bin_char_count = 0 ; while ( ! $has_nul ) { $read = sysread( F, $buf, 100_000 ) ; die "$! reading $cp\n" unless defined $read ; last unless $read ; $has_nul = $buf =~ tr/\x00// ; $bin_char_count += $buf =~ tr/\x00-\x08\x0b-\x1f\x7f-\xff// ; $total_char_count += length $buf ; } ; sysseek( F, 0, 0 ) or die "$! seeking on $cp\n" ; $buf = '' unless $read ; ## base64 generate 77 chars (including the newline) for every 57 chars ## of input. A '<char code="0x01" />' element is 20 chars. my $encoding = $bin_char_count * 20 > $total_char_count * 77/57 ? "base64" : "none" ; if ( ! $saw ## First rev, can't delta || ! defined $saw->work_path ## No file, can't delta || $has_nul ## patch would barf, can't delta || $encoding ne "none" ## base64, can't delta ) { ## Full content, no delta. $w->start_content( encoding => $encoding ) ; my $delete_nl ; while () { ## Odd chunk size is because base64 is most concise with ## chunk sizes a multiple of 57 bytes long. $read = sysread( F, $buf, 57_000 ) ; die "$! reading $cp\n" unless defined $read ; last unless $read ; if ( $convert_crs ) { substr( $buf, 0, 1 ) = "" if $delete_nl && substr( $buf, 0, 1 ) eq "\n" ; $delete_nl = substr( $buf, -1 ) eq "\n" ; $buf =~ s/(\r\n|\r)/\n/g ; ## ouch, that's gotta hurt. } if ( $encoding eq "none" ) { _emit_characters( $w, \$buf ) ; } else { $w->characters( encode_base64( $buf ) ) ; } } $w->end_content ; $w->setDataMode( 1 ) ; } else { ## Delta from previous version $w->base_name( $saw->name ) if $saw->name ne $r->name ; $w->base_rev_id( $saw->rev_id ) ; $w->start_delta( type => 'diff-u', encoding => 'none' ) ; my $old_cp = $saw->work_path ; die "vcp: no old work path for '", $saw->name, "'\n" unless defined $old_cp && length $old_cp ; die "vcp: old work path '$old_cp' not found for '", $saw->name, "'\n" unless -f $old_cp ; ## TODO: Include entire contents if diff is larger than the contents. ## Accumulate a bunch of output so that characters can make a ## knowledgable CDATA vs <& escaping decision. my @output ; my $outlen = 0 ; my $delete_nl ; ## TODO: Write a "minimal" diff output handler that doesn't ## emit any lines from $old_cp, since they are redundant. diff $old_cp, $cp, { ## Not passing file names, so no filename header. STYLE => "VCP::DiffFormat", OUTPUT => sub { push @output, $_[0] ; ## Assume no lines split between \r and \n because ## diff() splits based on lines, so we can just ## do a simple conversion here. $output[-1] =~ s/\r\n|\r/\n/g if $convert_crs ; $outlen += length $_[0] ; return unless $outlen > 100_000 ; _emit_characters( $w, \join "", splice @output ) ; }, } ; _emit_characters( $w, \join "", splice @output ) if $outlen ; $w->end_delta ; $w->setDataMode( 1 ) ; } ; $digestion = 1 ; } if ( $digestion ) { ## TODO: See if this should be seek or sysseek. sysseek F, 0, 0 or die "$!: $cp" ; my $d= Digest::MD5->new ; ## gotta do this by hand, since it's in binmode and we want ## to handle ^Z and lone \r's. my $delete_nl ; my $read ; my $buf ; while () { $read = sysread( F, $buf, 10_000 ) ; die "$! reading $cp\n" unless defined $read ; last unless $read ; if ( $convert_crs ) { substr( $buf, 0, 1 ) = "" if $delete_nl && substr( $buf, 0, 1 ) eq "\n" ; $delete_nl = substr( $buf, -1 ) eq "\n" ; $buf =~ s/(\r\n|\r)/\n/g ; ## ouch, that's gotta hurt. } $d->add( $buf ) ; } $d->addfile( \*F ) ; $w->digest( $d->b64digest, type => 'MD5', encoding => 'base64' ) ; } if ( $close_it ) { close F ; } $w->end_rev ; # $self->seen( $r ) ; } sub handle_footer { my VCP::Dest::revml $self = shift ; my ( $footer ) = @_ ; $self->writer->endAllTags() unless $self->none_seen ; return ; } sub writer { my VCP::Dest::revml $self = shift ; $self->{WRITER} = shift if @_ ; return $self->{WRITER} ; } =back =head1 AUTHOR Barrie Slaymaker <barries@slaysys.com> =head1 COPYRIGHT Copyright (c) 2000, 2001, 2002 Perforce Software, Inc. All rights reserved. See L<VCP::License|VCP::License> (C<vcp help license>) for the terms of use. =cut 1
# | Change | User | Description | Committed | |
---|---|---|---|---|---|
#1 | 1375 | Sean McCune | Creating my own branch for work on vcp. | ||
//guest/perforce_software/revml/lib/VCP/Dest/revml.pm | |||||
#14 | 1367 | Barrie Slaymaker | lots of docco updates | ||
#13 | 1358 | Barrie Slaymaker | Win32 changes | ||
#12 | 1174 | Barrie Slaymaker | Add and use VCP::DiffFormat | ||
#11 | 1171 | Barrie Slaymaker | Switch to using Text::Diff | ||
#10 | 1055 | Barrie Slaymaker |
add sorting, revamp test suite, misc cleanup. Dest/revml is not portable off my system yet (need to release ...::Diff) |
||
#9 | 695 | Barrie Slaymaker |
Cleaned up support for binary files in VCP::Dest::revml and altered test suite to deal with it better. Added some thoughts to the TODO file. |
||
#8 | 628 | Barrie Slaymaker | Cleaned up POD in bin/vcp, added BSD-style license. | ||
#7 | 609 | Barrie Slaymaker |
Add a file to the test procedure that it alternately added and deleted (file is named "readd"). Fixed all destinations to handle that. |
||
#6 | 608 | Barrie Slaymaker |
Lots of changes to get vcp to install better, now up to 0.066. Many thanks to Matthew Attaway for testing & suggestions. |
||
#5 | 480 | Barrie Slaymaker |
0.06 Wed Dec 20 23:19:15 EST 2000 - bin/vcp: Added --versions, which loads all modules and checks them for a $VERSION and print the results out. This should help with diagnosing out-of-sync modules. - Added $VERSION vars to a few modules :-). Forgot to increment any $VERSION strings. - VCP::Dest::cvs: The directory "deeply" was not being `cvs add`ed on paths like "a/deeply/nested/file", assuming "deeply" had no files in it. - VCP::Dest::revml: fixed a bug that was causing files with a lot of linefeeds to be emitted in base64 instead of deltaed. This means most text files. - Various minor cleanups of diagnostics and error messages, including exposing "Can't locate Foo.pm" when a VCP::Source or VCP::Dest module depends on a module that's not installed, as reported by Jeff Anton. |
||
#4 | 478 | Barrie Slaymaker |
0.05 Mon Dec 18 07:27:53 EST 2000 - Use `p4 labels //...@label` command as per Rober Cowham's suggestion, with the '-s' flag recommended by Christopher Siewald and Amaury.FORGEOTDARC@atsm.fr. Though it's actually something like vcp: running /usr/bin/p4 -u safari -c safari -p localhost:5666 -s files //.../NtLkly //...@compiler_a3 //.../NtLkly //...@compiler_may3 and so //on //for 50 parameters to get the speed up. I use the //.../NtLkly "file" as //a separator between the lists of files in various //revisions. Hope nobody has any files named that :-). What I should do is choose a random label that doesn't occur in the labels list, I guess. - VCP::Source::revml and VCP::Dest::revml are now binary, control code, and "hibit ASCII" (I know, that's an oxymoron) clean. The <comment>, <delta>, and <content> elements now escape anything other than tab, line feed, space, or printable chars (32 <= c <= ASCII 126) using a tag like '<char code="0x09">'. The test suite tests all this. Filenames should also be escaped this way, but I didn't get to that. - The decision whether to do deltas or encode the content in base64 is now based on how many characters would need to be escaped. - We now depend on the users' diff program to have a "-a" option to force it to diff even if the files look binary to it. I need to use Diff.pm and adapt it for use on binary data. - VCP::Dest::cvs now makes sure that no two consecutive revisions of the same file have the same mod_time. VCP::Source::p4 got so fast at pulling revisions from the repositories the test suite sets up that CVS was not noticing that files had changed. - VCP::Plugin now allows you to set a list of acceptable result codes, since we now use p4 in ways that make it return non-zero result codes. - VCP::Revs now croaks if you try to add two entries of the same VCP::Rev (ie matching filename and rev_id). - The <type> tag is now limited to "text" or "binary", and is meant to pass that level of info between foreign repositories. - The <p4_info> on each file now carries the one line p4 description of the file so that p4->p4 transferes can pick out the more detailed info. VCP::Source::p4, VCP::Dest::p4 do this. - VCP::{Source,Dest}::{p4,cvs} now set binaryness on added files properly, I think. For p4->p4, the native p4 type is preserved. For CVS sources, seeing the keyword substitution flag 'o' or 'b' implies binaryness, for p4, seeing a filetype like qr/u?x?binary/ or qr/x?tempobj/ or "resource" implies binaryness (to non-p4 destinations). NOTE: Seeing a 'o' or 'b' in a CVS source only ends up setting the 'b' option on the destination. That should be ok for most uses, but we can make it smarter for cvs->cvs transfers if need be. |
||
#3 | 473 | Barrie Slaymaker |
0.04 Tue Dec 12 00:15:57 EST 2000 - Reorg of VCP::Source::p4 - One large filelog command is run instead of many small ones. This takes advantage of the -m option to make sure enough changes are listed. Many extra revisions of most files are probably listed, but listing and ignoring them is quicker than spawning p4 over and over. Wish p4 filelog had a revision range... - it now doesn't suck the entire filelog output in to memory, it parses it line by line as it's emitted from the `p4 filelog` - `p4 print` is now used to print a bunch of files at once, using the header line to separate one file from the next, kind of like splitting a mime-encoded message. There's a very slight chance that it will misjudge the boundary between two files if a file happens to have a line that looks very much like the header line for the next file. This is pretty unlikely and I'll fix it if it crops up. I could batch them more, right now it never puts two revisions of the same filename in the same batch, for no really good reason. Another method might be to batch 25 or 50 revs each time. - it turns out there's a problem spawning multiple p4 commands at the same time against the same p4d (p4d is 99.2, FWIW). Or at least running large `p4 files ...` while there's a large `p4 filelog` still also running. - filelog lines beginning with "... ..." are now ignored. These are notifications of copy, branch, and integrate events that we don't yet do anything with. - deleted cur() and P4_CUR - deleted P4_IS_INCREMENTAL - Made an assertion in VCP::Dest::revml::handle_rev() a little clearer - Added some ok(1) calls to 90p4.t to make it easier to figure out which child process is whining or aborting - Made the message that's printed when a subcommand emits unexpected output say "stderr" instead of "stdout". - Cleaned up documentation for VC::Plugin::work_path(). |
||
#2 | 468 | Barrie Slaymaker |
- VCP::Dest::p4 now does change number aggregation based on the comment field changing or whenever a new revision of a file with unsubmitted changes shows up on the input stream. Since revisions of files are normally sorted in time order, this should work in a number of cases. I'm sure we'll need to generalize it, perhaps with a time thresholding function. - t/90cvs.t now tests cvs->p4 replication. - VCP::Dest::p4 now doesn't try to `p4 submit` when no changes are pending. - VCP::Rev now prevents the same label from being applied twice to a revision. This was occuring because the "r_1"-style label that gets added to a target revision by VCP::Dest::p4 could duplicate a label "r_1" that happened to already be on a revision. - Added t/00rev.t, the beginnings of a test suite for VCP::Rev. - Tweaked bin/gentrevml to comment revisions with their change number instead of using a unique comment for every revision for non-p4 t/test-*-in-0.revml files. This was necessary to test cvs->p4 functionality. |
||
#1 | 467 | Barrie Slaymaker | Version 0.01, initial checkin in perforce public depot. |