#!/usr/bin/perl use strict; use warnings; use Date::Calc qw( check_date Date_to_Days ); use FileHandle; # verify_coord_data.pl - script to verify Geohash coordinate data # globals use vars qw/ %opt $VERBOSE /; $VERBOSE = 0; sub usage { if ( @_ ) { print STDERR ( join " ", @_ ) . "\n"; } print STDERR <<END_OF_USAGE; Usage: $0 [-i <input file>] [-v] -i: input file to use (file format: "YYYY-MM-DD,<DOW>,<LAT>,<LON>") default: '-' for standard input -v: enable verbose logging, sent to standard error Example: * Verify the allcoords file in one inline command: \$ wget -qO - http://www.amipsychic.net/coords/allcoords.csv.bz2 | bzip2 -d | ./verify_coord_data.pl Notes: * You can get Geohash data from: http://www.amipsychic.net/geohashing.html * This script uses Date::Calc from CPAN; you can install it via: \$ cpan -i Date::Calc END_OF_USAGE exit 2; } sub logit { print STDOUT ( join " ", @_ ) . "\n"; } sub debuglog { if ( $VERBOSE == 1 ) { print STDERR ( join " ", @_ ) . "\n"; } } sub parse_args { use Getopt::Std; getopts( "hi:v", \%opt ) or usage( "error: invalid arguments" ); debuglog( "validating parameters..." ); if ( $opt{h} ) { usage(); } if ( ! $opt{i} ) { $opt{i} = "-"; } if ( $opt{i} ne "-" && ! -r $opt{i} ) { usage( "error: cannot read input file [$opt{i}]" ); } if ( $opt{v} ) { $VERBOSE = 1; } debuglog( "resulting parameters:" ); debuglog( "opt-i: $opt{i}" ); debuglog( "opt-v: " . ( defined $opt{v} ? $opt{v} : "undef" ) ); } sub verify_input { my $src_name = "[$opt{i}]"; $src_name = "<standard input>" if ( $opt{i} eq "-" ); debuglog( "src_name = [$src_name]" ); my $output = *STDOUT; my $input = new FileHandle; my @input; if ( ! open $input, $opt{i} ) { die "cannot open [$src_name]: $!\n"; } # the max amount of data we're dealing with is reasonable to load debuglog( "slurping in $src_name to array..." ); @input = <$input>; if ( ! close $input ) { warn "trouble closing file: $!\n"; } logit( "verifying Geohash data from $src_name..." ); my( $date, $dow, $lat, $lon ); my( $year, $month, $day ); my( $prevdate, $prevyear, $prevmonth, $prevday ); my( $lineno, $numerr ); $lineno = 1; $numerr = 0; $prevdate = ""; foreach ( @input ) { chomp; debuglog( "line $lineno: $_" ); if ( $_ eq "" ) { logit( $numerr++ . "empty line after $prevdate (line $lineno)" ); next; } ( $date, $dow, $lat, $lon ) = split /,/; debuglog( "parsed: [$date] [$dow] [$lat] [$lon]" ); ( $year, $month, $day ) = split /-/, $date; ( ! check_date( $year, $month, $day ) ) && logit( ++$numerr . ": bad date [$date] (line $lineno)" ) || debuglog( "date is valid" ); if ( $prevdate ne "" ) { ( Date_to_Days( $year, $month, $day ) != ( Date_to_Days( $prevyear, $prevmonth, $prevday ) + 1 ) ) && logit( ++$numerr . ": $date skipped days since $prevdate (line $lineno)" ) || debuglog( "date is sequential since previous date" ); } ( $dow <= 0 ) && logit( ++$numerr . ": $date dow [$dow] invalid (line $lineno)" ) || debuglog( "dow is sane" ); ( ($lat <= 0) || ($lat >= 1) ) && logit( ++$numerr . ": $date lat [$lat] invalid (line $lineno)" ) || debuglog( "lat is sane" ); ( ($lon <= 0) || ($lon >= 1) ) && logit( ++$numerr . ": $date lat [$lat] invalid (line $lineno)" ) || debuglog( "lon is sane" ); debuglog( "line $lineno is OK" ); $prevdate = $date; $prevyear = $year; $prevmonth = $month; $prevday = $day; $lineno++; } logit( ($lineno - 1) . " lines analyzed; $numerr errors detected" ); return $numerr; } parse_args(); exit verify_input();
# | Change | User | Description | Committed | |
---|---|---|---|---|---|
#3 | 7563 | Marc Wensauer | Update error reporting; provide example in usage | ||
#2 | 7311 | Marc Wensauer |
Fleshed out examples in usage section of generator script. Corrected minor bug in verify script. |
||
#1 | 7307 | Marc Wensauer | Adding script to verify Geohash coordinate data |