xwf 1.75 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67
#!/usr/bin/perl -w
#
# Parse document and report first syntax (well-formedness) error found.
#

use strict;
use XML::Parser;
use Getopt::Std;

my %opts;
getopts('e', \%opts);
my $ENTREFS = exists( $opts{'e'} );                 # flag: check ent refs

my $parser = XML::Parser->new( 
                               ErrorContext => 2,   # output error context
                               );

# get input from files
if( @ARGV ) {
    foreach( @ARGV ) {
        my $file = $_;
        unless( -r $file ) {
            print STDERR "ERROR: Can't open '$file'.\n";
            return;
        }
        my $input = '';
        open( F, $file );
        while( <F> ) { $input .= $_; }
        close F;

        # parse and report errors
        if( &parse_string( $input )) {
            print STDERR "ERROR in $file:\n$@\n";
        } else {
            print STDERR "'$file' is well-formed.\n";
        }
    }
    print "All files checked.\n";

# get input from STDIN
} else {
    my $input = "";
    while( <STDIN> ) { $input .= $_; }
    if( &parse_string( $input )) {
        print STDERR "ERROR in stream:\n$@\n";
    } else {
        print STDERR "No syntax errors found in XML stream.\n";
    }
}


# parse the string and return error message
#
# NOTE: By default, entity refs are not expanded.  XML::Parser can be
# told not to expand entity refs, but will still try to find
# replacement text just in case, which we don't want.  Therefore, we
# need to do a stupid regexp replacement, removing entities from input.
#
sub parse_string {
    my $string = shift;
    unless( $ENTREFS ) {
        $string =~ s/\&[^\s;]+;//g;         # remove entity references
    }
    eval { $parser->parse( $string ); };
    $@ =~ s/at \/.*?$//s;               # remove module line number
    return $@;
}