#!/usr/bin/perl -w # # Make a ranklist of tags from an XML file. # # Split before each start-tag/pi/comment/dcl # Ditch all lines that aren't one of those # Get rid of everything after the name # # ~2006-06-07: Written by Steven J. DeRose, sderoses@acm.org. # 2007-08-15 sjd: Add -v, kill dcls and end-tags and comments. # 2007-09-19 sjd: $version. Add -v, -alpha, -norank. # use strict; use Getopt::Long; my $version = "2010-09-12"; my $alpha = 0; my $norank = 0; my $quiet = 0; my $verbose = 0; # Process options # Getopt::Long::Configure ("ignore_case"); my $result = GetOptions( "alpha!" => \$alpha, "h|help|?" => sub { system "perldoc counttags"; exit; }, "q|quiet!" => \$quiet, "norank" => \$norank, "v|verbose+" => \$verbose, "version" => sub { die "Version of $version, by Steven J. DeRose.\n"; } ); ($result) || die "taglist: Bad options.\n"; ############################################################################### if (!defined $ARGV[0] || !-f $ARGV[0]) { warn "Can't find file '$ARGV[0]'\n"; showUsage(); exit; } my $files = join(" ",@ARGV); my $cmd = "cat $files" . " | sed 's///g'" # Remove (single-line) comments . " | sed 's/].*\$//'" # delete all but element type . " | sed 's/ Show this help and exit. =item * B<-alpha> Sort alphabetically by tag, not by frequency. =item * B<-norank> Don't show tag frequencies (implies -alpha). =item * B<-v> Show more detailed messages. =item * B<-version> Show version info and exit. =back =head1 Known bugs and limitations Does not use an actual parser, so can be mislead by tags buried inside multi-line comments, inside PIs, external entities that are not listed on the command line, etc. =head1 Related commands xmlstats: Calculates lots of statistics on an XML file, including the frequency list done here. 'taglist' is just quicker, and does not require that the file(s) be valid or even well-formed. =head1 Ownership This work by Steven J. DeRose is licensed under a Creative Commons Attribution-Share Alike 3.0 Unported License. For further information on this license, see http://creativecommons.org/licenses/by-sa/3.0/. The author's present email is sderose at acm.org. For the most recent version, see http://www.derose.net/steve/utilities/. =cut "; }