#!/usr/bin/perl -w # # align: Measure the fields on all lines, then copy, padding to align everything. # # Written ~2006-11-15 by Steven J. DeRose, sderose@acm.org. # 2008-05-09 sjd: Fix uninit'd variables; lineends. # # To do: # Support input/output line-end variations # Auto-justify (test for int and float fields). # Option to wrap and indent beyond some width. # Option to associate delimiter to l or r of padding, or delete. # Finish decimal justification calculations. # use strict; use Getopt::Long; my $version = "2008-05-09"; my $dft_delim = "\\s\\s*"; my $dft_lineends = "U"; my $dft_outlineends = "U"; my $altfillchar = "."; # Alternate fill character my $altfillinterval = 3; # Use altfill every how many lines? my $autojustify = 0; # Set justification from apparent datatypes my $boxdraw = 0; # Put in box-drawing characters? my $boxu = 0; # Unicode box-drawing characters? my $com = ""; # Expression for what's a comment line my $count = 0; # Verify that all records have same number of fields? my $delim = $dft_delim; # Regex used to split into fields my $dot = 0; # Use altfill and altfillinterval? my $fillchar = " "; # What char to pad with my $help = 0; my $howmany = 0; # Do only this many fields per record my $justify = ""; # List of ways to justify: L/R/C/. my $last = -1; # Pad last field (-1 = default, see help) my $left = 1; # Left-align fields instead of right? my $lineends = $dft_lineends; my $outlineends = $dft_outlineends; my $quiet = 0; my $strip = 0; my $tabs = 0; # Don't expand tabs my $trim = 0; # Trim whitespace off of field values? my $verbose = 0; # Process options Getopt::Long::Configure ("ignore_case"); my $result = GetOptions( "autojustify" => \$autojustify, "boxdraw" => \$boxdraw, "boxu" => \$boxu, "c|fill=s" => \$fillchar, "com=s" => \$com, "count!" => \$count, "d=s" => \$delim, "dot" => \$dot, "h|help|?" => \$help, "j|justify=s" => \$justify, "l|left!" => \$left, "last" => \$last, "lineends|linends=s" => \$lineends, "n=n" => \$howmany, "outlineends|outlinends=s" => \$outlineends, "q|quiet!" => \$quiet, "r|right" => sub { $left = 0; }, "strip!" => \$strip, "t" => \$trim, "tabs=n" => \$tabs, "v|verbose+" => \$verbose, "version" => sub { die "Version of $version, by Steven J. DeRose, sderose\@acm.org.\n"; } ); if ($help) { showUsage(); exit; } ($result) || die "Bad options.\n"; my $lineendChar = "\n"; if ($lineends =~ m/M/i) { $lineendChar = "\r"; } if ($lineends =~ m/D/i) { $lineendChar = "\r\n"; } my $outlineendChar = "\n"; if ($outlineends =~ m/M/i) { $outlineendChar = "\r"; } if ($outlineends =~ m/D/i) { $outlineendChar = "\r\n"; } # Validate and default options, and set up box-drawing charset ($howmany < 0) && die "-n value must be positive.\n"; ($tabs < 0) && die "Invalid tab interval $tabs.\n"; my @justifyList = (); if ($justify ne "") { @justifyList = split(/\s*/,uc($justify)); for my $j (@justifyList) { ($j =~ m/^[LCR.]/) || die "Bad justification setting '$j'.\n"; } } (length($fillchar) == 1) || die "fill character must be a single character, not '$fillchar'.\n"; (length($altfillchar) == 1) || die "alternate fill character must be a single character, not '$fillchar'.\n"; # Unicode boxdrawing chars? Block 0x2500-0x257F: # Code Name = "Box drawings light..." # 0x2500 horizontal (-) # 0x2502 vertical (|) # 0x250c down/right (upper-left corner) # 0x2510 down/left (upper-right corner) # 0x2514 up/right (lower-left corner) # 0x2518 up/left (lower-right corner) # 0x251c vertical/right (left end) # 0x2524 vertical/left (right end) # 0x252c down/horizontal (top end) # 0x2534 up/horizontal (bottom end) # 0x253c vertical/horizontal (cross) # # 0x256d arc down/right (upper-left round corner) # 0x256e arc down/left (upper-right round corner) # 0x256f arc up/left (lower-left round corner) # 0x2570 arc up/right (lower-right round corner) # 0x2571 diagonal upper-right/lower-left (slash) # 0x2572 diagonal upper-left/lower-right (backslash) # 0x2573 diagonal cross (x) # 0x2574 left # 0x2575 up # 0x2576 down # 0x2577 right my $boxpad = " "; # Space after column-sep ("|") my $boxhoriz = "-"; my $boxvert = "|"; my $boxulcorner = "/"; my $boxurcorner = "\\"; my $boxllcorner = "\\"; my $boxlrcorner = "/"; my $boxlend = "|"; my $boxrend = "|"; my $boxtend = "-"; my $boxbend = "-"; my $boxcross = "+"; if ($boxu) { $boxdraw = 1; $boxhoriz = "─"; $boxvert = "│"; $boxulcorner = "┌"; $boxurcorner = "┐"; $boxllcorner = "└"; $boxlrcorner = "┘"; $boxlend = "├"; $boxrend = "┤"; $boxtend = "┬"; $boxbend = "┴"; $boxcross = "┶"; } if ($boxdraw) { $last = 1; } if ($last == -1) { $last = ($left) ? 0:1; } ################################################################################ # Load all of stdin (which limits filesize, but enables doing both passes) my @lines = <>; ($verbose) && warn "align: Total lines: " . scalar(@lines) . ".\n"; # Expand tabs, or we may count wrong. if ($tabs > 0 && 1==0) { for (my $lnum=0; $lnum < scalar @lines; $lnum++) { $lines[$lnum] = expandTabs($lines[$lnum],$tabs); } } # Find the widest instance of each field my @maxes = (); my @preDecMax = (); my @postDecMax = (); my @fieldtype = (); my $delimHasS = ($delim =~ m/\\s/) ? 1:0; my $sawEmptyTabbedFields = 0; my $priorTokenCount = -1; my $recNum = 0; foreach my $l (@lines) { $recNum++; chomp($l); if ($strip) { $l =~ s/^\s*//; $l =~ s/\s*$//; } if ($com ne "" && $l =~ m/$com/) { next; } if ($delimHasS && $l =~ m/\t\t/) { $sawEmptyTabbedFields++; } my @tokens = split("\($delim\)", $l); # we keep the delims, too # Chop off extra fields if user asked if ($howmany > 0 && scalar @tokens > $howmany) { my @remainder = splice @tokens, $howmany+1; $tokens[$howmany] = join("",@remainder); ($verbose) && warn " align: Unbroken: $tokens[$howmany]\n"; } ($verbose) && warn "align: Tokenized: |" . join("|",@tokens) . "|\n"; # Verify that we have consistent number of fields if ($count) { my $curTokenCount = scalar(@tokens); if ($priorTokenCount == -1) { $priorTokenCount = $curTokenCount; } elsif ($curTokenCount != $priorTokenCount) { warn "align: Record $recNum has different number of delims+fields" . "($curTokenCount)" . " than prior record(s) had ($priorTokenCount):\n" . "'" . showInvisibles($l) . "'\n"; die "It parsed into:\n '" . join("'\n '",@tokens) . "'\n"; } } for (my $i=0; $i= scalar(@maxes)) { $maxes[$i] = 0; } if ($trim) { $tokens[$i] =~ s/^\s*//; $tokens[$i] =~ s/\s*$//; } my $len = length($tokens[$i]); if ($len > $maxes[$i]) { $maxes[$i] = $len; } if (defined($justifyList[$i]) && $justifyList[$i] =~ m/^\./) { my $dpos = index($tokens[$i],"."); my $preDecLength = ($dpos >= 0) ? $dpos : length($tokens[$i]); my $postDecLength = ($dpos >= 0) ? (length($tokens[$i])-$dpos-1) : 0; if ($preDecLength > $preDecMax[$i]) { $preDecMax[$i] = $preDecLength; } if ($postDecLength > $postDecMax[$i]) { $postDecMax[$i] = $postDecLength; } } } # for $i } # for $l # Set the max field length for decimal-justified fields to maxpre+maxpost+1. for (my $i=0; $i -1) { warn "align: mid-line newline found\n"; } my @tokens = split("\($delim\)", $l); # we keep the delims, too if ($howmany > 0 && scalar @tokens > $howmany) { my @remainder = splice @tokens, $howmany+1; $tokens[$howmany] = join("",@remainder); } my $out = ""; for (my $i=0; $i= 0) ? (length($t)-$dpos-1) : 0; my $postDecNeeded = $postDecMax[$i] = $postDecLength; if ($postDecNeeded > 0) { $tpad .= ($curfillchar x $postDecNeeded); } $needed = $maxes[$i] = length($t); if ($needed > 0) { $tpad = ($curfillchar x $needed) . $t; } } else { # Right-justified $tpad = ($curfillchar x $needed) . $t; } } else { # default to R justified $tpad = ($curfillchar x $needed) . $t; } $out .= $tpad . $boxpad; } if (!$last) { $out =~ s/$curfillchar+$//; } if ($boxdraw) { print "$out$boxvert\n"; print $rowsepLine; } else { print "$out\n"; } } ($boxdraw) && print $bottomLine; my $rct = scalar @lines; ($quiet) || warn "align: Done, $rct records.\n"; exit; ################################################################################ sub expandTabs { my $l = $_[0]; my $interval = $_[1]; my $buf = ""; for (my $i=0; $i