#!/usr/bin/perl -w # # dumpx: A better hex-dump utility, at least for 'text'. # # Why? # od's interface. It's hard to get it to display based on characters, # and it gets byte order wrong in some settings. It can't align X and A, # and it can't produce a legible ASCII text. You can't easily go to whatever # offset you want (except at the start). This layout is much more like the # old CMS hex dump program (probably was a local mod at Brown University # by Peter DiCamillo; at any rate it was good). # # History: # Long ago (~1980?): Written in C by Steven J. DeRose. # 2006-06-03: Ported to Perl and made options more like Unix od. # 2006-09-12: Make -x synonym for -h. Implement $linesToShow. # 2006-09-13: Support stdin when no file argument. # 2006-10-02: Make color options like ls/grep. Split up help. # 2006-11-03: Add -cc. # 2006-11-07: Add -color controls. # 2007-01-11 sjd: Add -jl, -lineend. # 2007-02-15 sjd: Add -linenum. # 2007-08-29 sjd: Improve -listchars. Add -codepage. # 2007-09-05 sjd: Add unicode classes. # 2007-11-27 sjd: Start strict. # 2008-02-13 sjd: Add perl -w, fix a few bugs, esp. with colorizing. # 2008-08-31 sjd: Check for BSD. # # Todo: # Only escape once for several characters in same color. # Finish interactive mode. Ditch 'header', add 'lines' to options. # Unicode by chars not bytes - Sniff utf-8, BOM. # Finish non-Latin1 charset support. # To display text in other encodings, pipe -a portion through iconv? # Option to show Unicode names/block for chars? Or color by class? # my $version = "0.99, 2008-08-31"; # Stuff needed for validating options my $codepages = # See www.microsoft.com/globaldev/reference/WinCP.mspx "1252"; my $unicodeClassExpr = getUnicodeClassExpr(); # Options my $alphaSideGutter = 0; # Split alpha columns at middle my $blankLine = 0; # Put in an extra blank line my $codepage = ""; my $cwidth = 3; # Width to allow for each character (including gutter) my $G1ok = 1; # Ok to print G1 (Latin-1) characters? my $linesToShow = ""; # List of line types to display, in order specified my $listChars = 0; # Just show Latin-1 table my $locOption = 0; # Where to start from my $maxBytes = 0; # 0 means no limit my $newlineDisplayChar = chr(0xB6); # Pilcrow my $nlines = 16; # Lines in display block my $offsetBase = 0; # What base to use to show file offsets # (default: use same base as line's data) my $quiet = 0; my $recordSep = ""; # Incoming line-end type? my $startLine = 0; # If non-zero, skip to this line num. my $user = 0; # Run in interactive mode? my $verbose = 0; # Don't leave out duplicate lines my $width = 16; # Bytes expanded per display line # Options for what forms to display chars in my $showAlpha = 1; my $showHead = 0; # Show column header w/ low-order nibbles my $showHex = 1; # default my $showOctal = 0; my $showDecimal = 0; my $showAlphaBelow = 0; my $showAlphaSide = 1; my $showDuplicates = 1; my $showCStyle = 0; my $showLineNumber = 0; # Color options (note: I alias dumpx to dumpx --color=auto) my $colorNL = 0; # Try to display newlines in red? if ($ENV{USE_COLOR} && ($ENV{USE_COLOR} eq 'auto')) { $colorNL = 1; } my $colorXML = 0; # Color angle brackets and ampersands my $colorCONTROLS = 0; # Color chars 00-31 and 128-159 my $colorNonASCII = 0; # Color chars >127 my $cc = ""; # chars to color my $colorClass = ""; # Perl name for Unicode color class my $colorClassNegated = ""; my $logfilename = ""; my $LOG = 0; ################################################################################ # Process options while ($ARGV[0]) { if (index($ARGV[0],"--")==0) { $ARGV[0] = substr($ARGV[0],1); } if ($ARGV[0] eq "-user") { $user = 1; } # od-like main options elsif ($ARGV[0] eq "-A") { shift; if ($ARGV[0] == 8 or $ARGV[0] == 10 or $ARGV[0] == 16) { $offsetBase = $ARGV[0]; } else { die "Unsupported -A value '$ARGV[0]' (must be 8, 10, or 16.\n"; } } elsif ($ARGV[0] eq "-j") { shift; $locOption = $ARGV[0]; } elsif ($ARGV[0] eq "-jl") { shift; $startLine = $ARGV[0]; } elsif ($ARGV[0] eq "-linenum") { $showLineNumber = 1; } elsif ($ARGV[0] eq "-N") { shift; $maxBytes = $ARGV[0]; if ($maxBytes<=0) { die "-N value too small.\n"; } } elsif ($ARGV[0] eq "-v") { $showDuplicates = 0; } elsif ($ARGV[0] eq "-V") { $verbose = 1; } elsif ($ARGV[0] eq "-w") { shift; $width = $ARGV[0]; ($width >= 8) || die "-w bytes per output line must be >= 8.\n"; } elsif ($ARGV[0] eq "-version") { warn "Dumpx $version, by Steven J. DeRose, sderose\@acm.org.\n"; exit; } # od-like format options elsif ($ARGV[0] eq "-a") { $showAlphaBelow = 1; $linesToShow .= " a"; } elsif ($ARGV[0] eq "-c") { $showCStyle = 1; $linesToShow .= " c"; } elsif ($ARGV[0] eq "-d") { $showDecimal = 1; $linesToShow .= " d"; } elsif ($ARGV[0] eq "-h" || $ARGV[0] eq "-x") { $showHex = 1; $linesToShow .= " h"; if ($ARGV[0] eq "-h" && !$quiet) { warn "(did you mean '-help'? '-h' is for hexadecimal display)\n"; } } elsif ($ARGV[0] eq "-o") { $showOctal = 1; $linesToShow .= " o"; } # non-od-like options elsif ($ARGV[0] eq "-as") { $showAlphaSide = 1; } elsif ($ARGV[0] eq "-ag") { $alphaSideGutter = 1; } elsif ($ARGV[0] eq "-blank") { $blankLine = 1; } elsif ($ARGV[0] eq "-nocolor") { $colorNL = 0; $colorXML = 0; } elsif (substr($ARGV[0],0,6) eq "-color") { my $eq = index($ARGV[0],"="); my $carg; if (($eq > -1) && ($eq < length($ARGV[0])-1)) { $carg = substr("$ARGV[0]",$eq+1); } else { shift; $carg = $ARGV[0]; } if ($carg eq "off") { $colorNL = 0; $colorXML = 0; } elsif ($carg eq "auto") { $colorNL = 1; $colorXML = 0; } elsif ($carg eq "xml") { $colorNL = 0; $colorXML = 1; } elsif ($carg eq "controls") { $colorNL = 0; $colorCONTROLS = 1; } elsif ($carg eq "nonascii") { $colorNL = 0; $colorNonASCII = 1; } elsif ($carg eq "all") { $colorNL=$colorXML=$colorCONTROLS = 1;} elsif ($carg =~ m/^($unicodeClassExpr)$/) { $colorClass = $carg; } elsif ($carg =~ m/^!($unicodeClassExpr)$/) { $colorClassNegated = $carg; } else { warn "Unknown -color argument '$carg'. Known ones:\n"; (my $msg = $unicodeClassExpr) =~ s/\|/, /g; warn "off, auto, xml, controls, all, and any of these with or" . " without '!' on the front:\n$msg.\n"; exit; } } elsif ($ARGV[0] eq "-cc") { shift; $cc .= makeChar($ARGV[0]); print "Colorizing character '$ARGV[0]' (d'" . ord($cc) . "').\n"; } elsif ($ARGV[0] eq "-codepage") { shift; $codepage = $ARGV[0]; (" $codepages " =~ " $codepage ") || die "Unknown code page '$codepage'.\n"; } elsif ($ARGV[0] eq "-cwidth" || $ARGV[0] eq "-cw") { shift; $cwidth = $ARGV[0]; ($cwidth >= 2) || die "-cwidth to display per character must be >= 2.\n"; } elsif ($ARGV[0] eq "-head") { $showHead = 1; } elsif ($ARGV[0] eq "-lineend") { shift; my $ltype = substr("$ARGV[0] ",0,1); if ($ltype eq "m") { $recordSep = chr(13); } elsif ($ltype eq "u") { $recordSep = chr(10); } elsif ($ltype eq "d") { $recordSep = chr(13) . char(10); } else { die "Unknown -lineend type, must be m or u or d, not '$ltype'\n"; } } elsif ($ARGV[0] eq "-listchars") { $listChars = 1; $blankLine = 1; $cwidth = 4; } elsif ($ARGV[0] eq "-noas") { $showAlphaSide = 0; } elsif ($ARGV[0] eq "-nohex") { $showHex = 0; } elsif ($ARGV[0] eq "-nog1") { $G1ok = 0; $newlineDisplayChar = "."; } elsif ($ARGV[0] eq "-help-user") { helpUser(); exit; } elsif (substr($ARGV[0],0,1) eq "-") { ($ARGV[0] eq '-h' or $ARGV[0] eq '-help') || print "Unknown option '$ARGV[0]'. Usage:\n"; doHelp(); exit; } else { last; } shift; } # options ################################################################################ # General setup my $loc = $locOption; # Current offset into file my $lastLine = ""; # To avoid duplicates... my $lineNumber = 0; # What line are we at? my $bytesDumpedSoFar = 0; # To support -N option ($maxBytes) # Create an array of short names for the C0 and C1 control characters # PAD, HOP, and SGCI are listed as "XXX" in Unicode (acc. Wikipedia). my @C0names = ( "NUL", "SOH", "STX", "ETX", "EOT", "ENQ", "ACK", "BEL", " BS", " HT", " LF", " VT", " FF", " CR", " SO", " SI", "DLE", "DC1", "DC2", "DC3", "DC4", "NAK", "SYN", "ETB", "CAN", " EM", "SUB", "ESC", " FS", " GS", " RS", " US"); my @C1names = ( "PAD", "HOP", "BPH", "NBH", "IND", "NEL", "SSA", "ESA", "HTS", "HTJ", "VTS", "PLD", "PLU", "RI", "SS2", "SS3", "DCS", "PU1", "PU2", "STS", "CCH", "MW", "SPA", "EPA", "SOS", "SGCI", "SCI", "CSI", "ST", "OSC", "PM", "APC", "NBS"); if (scalar @C0names != 32 || scalar @C1names != 33) { warn "\nname tables broken.\n\n"; } # Create color escapes my $esc = chr(27); my $cRed = "$esc\[1;31m"; my $cGreen = "$esc\[1;32m"; my $cYellow = "$esc\[1;33m"; my $cBlue = "$esc\[1;34m"; my $cMagenta = "$esc\[1;35m"; my $cCyan = "$esc\[1;36m"; my $cEnd = "$esc\[0;39m"; my $oldColor = "$cEnd"; # current color state: default (see colorChars()). my $newColor = ""; # General state/globals my $needAlphaSide = 0; # global flag: show chars on right? my $alreadyDidStars = 0; my $ofn = 0; # file name for interactive log my $in; # global input buffer # State for interactive mode my $startLoc = 0; my $MAXTOKENLENGTH = 32; my $MAXMACRODEPTH = 10; my @mstack = (); my $mdepth = 0; my $prevLoc = 0; # Previous offset my $lastTarget = ""; ################################################################################# if ($listChars){ # Make fake input data, call usual display code. print "Latin-1 chart:\n"; my $s = ""; for (my $i=1; $i<=255; $i++) { $s .= chr($i); } ($verbose) && print "Got string: '$s'.\n"; if (!$linesToShow) { $linesToShow .= "h a"; } ($verbose) && print "Lines to show: '$linesToShow'.\n"; $loc = 0; while ($s ne "") { $in = substr($s,0,$width); foreach my $typ (split(/ +/,$linesToShow)) { if ($typ eq "h") {showHex(); } elsif ($typ eq "d") {showDecimal(); } elsif ($typ eq "o") {showOctal(); } elsif ($typ eq "c") {showCStyle(); } elsif ($typ eq "a") {showAlphaBelow(); } } ($blankLine) && print("\n"); $s = substr($s,$width); $loc += $width; } print "Done.\n"; exit; } # listChars if ($loc != 0 && $showLineNumber) { warn "Can't show line numbers when -j has been used, sorry.\n"; $showLineNumber = 0; } my $fh; if (!$ARGV[0]) { ($user) && die "Can't run interactive AND take data from stdin.\n"; open $fh, "<&STDIN"; } elsif (!-e $ARGV[0]) { warn "Can't find file '$ARGV[0]'.\n"; doHelp(); exit; } else { open $fh, "<$ARGV[0]"; if ($recordSep) { # $INPUT_RECORD_SEPARATOR = $recordSep; $/ = $recordSep; } } if ($startLine > 0) { for (my $i=1; $i<$startLine; $i++) { $lineNumber++; if (!defined readline $fh) { die "Couldn't reach starting line $startLine, EOF at line $i.\n"; } } $loc = tell $fh; ($verbose) && warn "$startLine is at offset $loc\n"; } ################################################################################# # print "Hex/octal/decimal file dump utility, version $version.\n\n"; if ($user) { interact(); } else { while (dumpPage() && ($maxBytes==0 || $bytesDumpedSoFar<$maxBytes)) { } } exit; ################################################################################# sub interact { my $suppress = 0; # Set to avoid redisplaying block my $done = 0; while (!$done) { # Display current screenful of file if ($suppress) { $suppress = 0; } else { dumpPage(); } $prevLoc = $loc; # Save for going back # Get next user command my $ucmd; print("\nCommand (or '?')? "); if (!($ucmd = readline(*STDIN))) { exit; } my @tokens = split(" ",$ucmd); if ($tokens[0] eq "") { shift(@tokens); } my $nparms = scalar @tokens; if (!$tokens[0]) { #$loc += $width*$nlines; next; } my $cmdChar = uc(substr($tokens[0],0,1)); # Movement commands if ($cmdChar eq '+') { # Scroll fwd if (!$tokens[0][1]) { $loc += $width * $nlines; } else { $loc += getNum(substr($tokens[0],1)); } } elsif ($cmdChar eq '-') { # Scroll bkwd if (!$tokens[0][1]) { $loc -= $width*$nlines*2; } else { $loc -= getNum(substr($tokens[0],1)); } } elsif ($cmdChar eq '@') { # Go to offset $loc = getNum(substr($tokens[0],1)); print "*** arg was " . substr($tokens[0],1); #print sprintf("*** Moved to 0x%x = d%d = o%o.\n",$loc,$loc,$loc); } elsif ($cmdChar eq 'B') { # Back to prior loc $loc = $prevLoc; } # Find commands elsif ($cmdChar eq 'F') { # Find my $tgt = strcvt($tokens[1]); my $foundLoc = findString($tgt); if ($foundLoc<0) { print("Unable to find string '$tgt'\n"); } else { my $line = sprintf("Found '%s' at offset 0x%lx (d%ld)\n", $tgt,$foundLoc,$foundLoc); print "$line"; $loc = $foundLoc; $lastTarget = $tgt; } } elsif ($cmdChar eq 'N') { # Find next my $reps = getNum($tokens[1]); if ($reps>0) { $suppress = 1; } my $lastLoc = $startLoc = $loc; for (my $i=1; $i<=$reps; $i++) { my $foundLoc = findString($lastTarget); if ($foundLoc<0) { print("Unable to find string '$lastTarget'\n"); } elsif ($reps>0) { print( "#%04ld: @ 0x%06lx, 0x%06lx from start, 0x%06lx from last\n", $i,$foundLoc,$foundLoc-$startLoc,$foundLoc-$lastLoc); ($LOG) && fprint($LOG, "#%04ld: @ 0x%06lx, 0x%06lx from start, 0x%06lx from last\n", $i,$foundLoc,$foundLoc-$startLoc,$foundLoc-$lastLoc); } else { print("Found '%s' at offset 0x%lx (d%ld)\n", $lastTarget,$foundLoc,$foundLoc); $loc = $lastLoc = $foundLoc; } } # for } elsif ($cmdChar eq 'D') { # Show difference my $i = getNum($tokens[1]); my $j = getNum($tokens[2]); print("x%lx (d%ld) - x%lx (d%ld) = x%lx (d%ld, o%lo)\n", $i,$i,$j,$j,$i-$j,$i-$j, $i-$j); $suppress = 1; } elsif ($cmdChar eq 'L') { # Open log file if (!$LOG) { $ofn = "outfile"; open $LOG, "> $ofn"; if ($LOG!=NULL) { print("'Log' commands will save to file '%s'\n",$ofn); } } my $lines = getNum($tokens[1]); if ($lines<1) { $lines = $nlines; } dumpPage(); print("$lines lines appended to log file.\n"); } elsif ($cmdChar eq 'S') { # Set option setOption(); $suppress = 1; } elsif ($cmdChar eq 'M') { # Macro invocation if ($mdepth>=$MAXMACRODEPTH-1) { print("Maximum macro depth exceeded ($MAXMACRODEPTH).\n"); } else { if (!$tokens[1][0]) { getFN($tokens[1],'r'); } if (($mstack[++$mdepth]=fopen($tokens[1],"r"))==NULL) { print("Unable to open macro file '$tokens[1]'.\n"); $mdepth--; } else { print("Opened macro file '$tokens[1]'.\n"); } } } elsif ($cmdChar eq 'Q') { # Quit last; } elsif ($cmdChar eq '?') { doHelp(); } else { print("\nHuh? ('?' for help) \n"); } } # while not done # Clean up ($fh) && close($fh); ($LOG) && close($LOG); while ($mdepth>=0) { close $mstack[$mdepth--]; } } # sub interact ############################################################################## # Apply escape string to colorize the field if it represents a character to # be colorized. Parameters: # 0: Numeric code point for the character in question # 1: Some string representation for the code point (decimal, hex, etc). # Don't apply and end for every char, since Bash counts the escape strings # into line-length and will force a wrap too early. # sub colorChars { my $theCode = $_[0]; # The code point involved my $theField = $_[1]; # The displayable representation to colorize my $theChar = chr($theCode); if (($cc ne "") && index($cc,$theChar) > -1) { $newColor = $cYellow; } elsif ($colorClass && ($theCode =~ m/\p{$colorClass}/)) { $newColor = $cGreen; } elsif ($colorClassNegated && ($theCode =~ m/\P{$colorClassNegated}/)) { $newColor = $cRed; } elsif ($colorNonASCII && $theCode>=128) { $newColor = $cGreen; } elsif ($colorCONTROLS && ($theCode<32 || ($theCode>=128 && $theCode<160))) { $newColor = $cGreen; } elsif ($colorNL && $theCode==10) { $newColor = $cRed; } elsif ($colorNL && $theCode==13) { $newColor = $cRed; } elsif ($colorXML && ($theCode==60 || $theCode==62 || $theCode==38)) { $newColor = $cGreen; } else { $newColor = $cEnd; } if ($newColor ne $oldColor) { $oldColor = $newColor; return($newColor . $theField); } else { return($theField); } } # colorChars ############################################################################## # Display a block in various forms # Return 0 (FALSE) on EOF. sub dumpPage { seek $fh,$loc,0; if ($showLineNumber) { print "*** In line number $lineNumber ***\n"; } # Make column headers with low nibble of offsets if ($showHead) { print("Low dig: "); my $p1 = my $p2 = ""; for (my $i=0; $i<$width; $i++) { $p1 .= sprintf(" %1lx",($loc+$i) % 16); if ($i % 8 == 7) { $p1 .= " "; } # gutter } for (my $i=0; $i<$width; $i++) { $p2 = sprintf("%1lx",($loc+$i) % 16); } print("$p1 $p2\n"); } # Now put out the real data for (my $l=0; $l<$nlines; $l++) { my $numread = read $fh,$in,$width; if ($numread==0) { print "\n *** END OF FILE ***\n"; return(0); } (my $newlines = $in) =~ s/[^\n]//g; $lineNumber += length($newlines); if ($lastLine eq $in) { if (!$verbose) { if (!$alreadyDidStars) { print "*** (duplicate)\n'$lastLine'\n'$in'\n"; } $alreadyDidStars = 1; next; } } $alreadyDidStars = 0; # Flag whether we still need to show the side chars. # Whoever displays them will clear the flag. $needAlphaSide = showAlphaSide; my $oldway = 0; if ($oldway) { ($showHex) && showHex(); ($showDecimal) && showDecimal(); ($showOctal) && showOctal(); ($showCStyle) && showCStyle(); ($showAlphaBelow) && showAlphaBelow(); } else { if (!$linesToShow) { $linesToShow .= "h a"; } foreach my $typ (split(/ */,$linesToShow)) { if ($typ eq "h") {showHex(); } elsif ($typ eq "d") {showDecimal(); } elsif ($typ eq "o") {showOctal(); } elsif ($typ eq "c") {showCStyle(); } elsif ($typ eq "a") {showAlphaBelow(); } } } if ($oldColor ne $cEnd) { print "$cEnd"; $oldColor = $cEnd; } ($blankLine) && print("\n"); $bytesDumpedSoFar += $width; if ($maxBytes!=0 && $bytesDumpedSoFar>$maxBytes) { last; } $lastLine = $in; $loc += $width; } # for each line of 16 bytes return(1); } # End dumpPage ############################################################################### # Return the string to print for the file offset, in specified base. # Arguments: # $_[0]: Either: # the base of the data to follow on the line, so we can match it # unless overridden by $offsetBase; or # -1, to return blank space instead of the coded offset. # sub showOffset { my $p; my $baseToUse; if ($offsetBase == 0) { # default: make it match the line $baseToUse = $_[0]; } else { $baseToUse = $offsetBase; } if ($baseToUse == 8) { $p = sprintf("o%06lo: ",$loc); } elsif ($baseToUse == 10) { $p = sprintf("d%06ld: ",$loc); } elsif ($baseToUse <= 0) { # blank space of same width $p = sprintf(" ",$loc); } else { $p = sprintf("x%06lx: ",$loc); } return($p); } sub showHex { my $p = showOffset(16); for (my $i=0; $i<$width && $i 0) { return (" " x $needed) . $_[0]; } return(substr($_[0],0,$cwidth)); } ############################################################################### # Return od-like abbreviation for any given character value. # 3 chars wide, to print under the character number. sub getName { my $n = ord($_[0]); my $charName = ""; if ($n<0) { # Bad number $charName = "---"; } elsif ($n<=31) { # C0 set $charName = $C0names[$n]; } elsif ($n<=126) { # G0 set $charName = chr($n); } elsif ($n==127) { # 127 $charName = "DEL"; } elsif ($n<=160) { # C1 set and NBSP if ($codepage eq "1252") { # (Windows pseudo-Latin-1) $charName = chr($n); } else { $charName = $C1names[$n-128]; } } elsif ($n<=255 && $G1ok) { # G1 set $charName = chr($n); } else { # Too high $charName = "+++"; } #my $field = colorChars($_[0],$field); # ??? $charName = colorChars($n,$charName); # ??? return($charName); } # sub getName # Return the C-style representation for any given character value. # Pad to print under the character number. # *** multi-byte characters are not yet supported. sub getCName { my $n = ord($_[0]); my $charName = ""; if ($n<0) { # Bad number $charName = "---"; } elsif ($n>255) { # Too high $charName = "+++"; } elsif ($n==0) { $charName = " \\0"; } # NULL elsif ($n==7) { $charName = " \\a"; } # BELL? elsif ($n==8) { $charName = " \\b"; } # BSP elsif ($n==10) { $charName = " \\n"; } # NL elsif ($n==12) { $charName = " \\f"; } # FF elsif ($n==13) { $charName = " \\r"; } # CR elsif ($n==9) { $charName = " \\t"; } # TAB elsif ($n==11) { $charName = " \\v"; } # VTAB elsif ($n>= 32 && $n<=126) { $charName = " " . chr($n); } else { # C0 $charName = sprintf("%o",$n); } return($charName); } # sub getName # Return a single character to print for any code point, to go in right column. # For printables it's the character itself; for others '.'. sub getPrintChar { my $n = ord($_[0]); my $displayChar = "."; if ($n==10 || $n==13) { $displayChar = $newlineDisplayChar; } elsif ($n== 9 || $n==32) { $displayChar = " "; } elsif ($n==60 || $n==62 || $n==38) { $displayChar = chr($n); } elsif ($n>=32 && $n<=126) { $displayChar = chr($n); } elsif ($G1ok && $n>=161 && $n<=255) { $displayChar = chr($n); } return(colorChars($n,$displayChar)); } # sub getPrintChar ############################################################################### # Subroutines for interactive mode # # strcvt resolves '\' codes while copying string (used for 'find' command) sub strcvt { my $hex = "0123456789ABCDEF"; my $tgt = ""; for (my $i=0; $i < length($_[0]); $i++) { my $c = substr($_[0],$i,1); if ($c!='\\') { $tgt .= $c; next; } $i++; if ($i == length($_[0])) { last; } $c = substr($_[0],$i,1); my $d = ($i+1 < length($_[0])) ? substr($_[0],$i+1,1) : ""; if ($c=='\\') { $tgt .= '\\'; } elsif ($c=='n') { $tgt .= chr(10); } elsif ($c=='r') { $tgt .= chr(13); } elsif ($c=='t') { $tgt .= chr(9); } elsif (index($hex,$c) > -1 && $d && index($hex,$d) > -1) { my $theValue = hex($c.$d); if (!$theValue == 0) { print("Sorry, can't search for NULL yet\n"); $tgt .= ' '; } else { $tgt .= ord($theValue); } } else { print("Bad hex digit after backslash.\n"); } } # for } # sub strcvt # Scan the input for a string, starting at offset specified. # No provision yet for searching for hex strings. # Returns the file offset where it was found, or -1 on failure. sub findString { my $token = $_[0]; my $len = length($token); my $testbuf = ""; if ($len<1) { return(-1); } if ($len>=$MAXTOKENLENGTH) { $len = $MAXTOKENLENGTH-1; } seek($fh,$startLoc,0); # need to update $lineNumber here; while ((my $i=fgetc($fh))!=EOF) { if ($i eq $token) { if ($len==1) { return(tell($fh) - 1); } read $fh,$testbuf,$len-1; # need to update $lineNumber here; seek $fh,-($len-1),1; # Backspace to not miss next possible match $testbuf = substr($testbuf,0,$len-1); if (substr($token,1) eq $testbuf) { return(tell($fh) - 1); } } } # while return(-1); # not found } # End findString # Convert number in various formats to actual quantity. sub getNum { my $val = $_[0]; $val = oct($val) if ($val =~ /^0/); return($val); } # End getNum # Set some named user option. sub setOption { print "Can't set options in interactive mode yet.\n"; } # sub setOption sub makeChar { if (substr($_[0],0,1) ne "\\") { return($_[0]); } if (substr($_[0],1,1) eq "0") { # octal my $value = dec(substr($_[0],1)); return(chr($value)); } if ($_[0] eq "\\n") { return("\n"); } if ($_[0] eq "\\r") { return("\r"); } if ($_[0] eq "\\t") { return("\t"); } } ############################################################################### # Handle alternate character encodings # # This will translate a string to ASCII, which should then be printed in # the right column or a following -a or similar line. # sub recode { my $s = $_[0]; my $charset = $_[1]; my $rc = ""; my @lookup; if ($charset eq "ebcdic") { die "Not supported yet.\n"; my @s0 = qw / nul soh stx etx . ht . del . . . vt ff cr so si /; my @s1 = qw / dle . . . . . bs . can em . . irs igs irs ius /; my @s2 = qw / . . . . . lf etb esc . . . . . enq ack bel /; my @s3 = qw / . . syn . . . . eot . . . . . nak . sub /; my @s4 = qw / \ . . . . . . . . . \[ \. \< \( + | /; my @s5 = qw / \& . . . . . . . . . \] \$ * \) \; \^ /; my @s6 = qw / - \/ . . . . . . . . \| /; push @s6, [",", "%", "_", ">", "?"]; my @s7 = qw / . . . . . . . . . \' \:/; push @s7, ["\#", "\@", "\'", "\=", "\\"]; my @s8 = qw / a b c d e f g h i /; # 81-89 my @s9 = qw / j k l m n o p q r /; # 91-99 my @sa = qw / ~ s t u v w x y z /; # A1-A9 my @sb = qw / A B C D E F G H I /; # C1-C9 my @sc = qw / J K L M N O P Q R /; # D1-D9 my @sd = qw / \\ /; # E0 my @se = qw / S T U V W X Y Z /; # E2-E9 my @sf = qw / 0 1 2 3 4 5 6 7 8 9 /; # F0-F9 my @ebcdic = ( @s0, @s1, @s2, @s3, @s4, @s5, @s6, @s7, @s8, @s9, @sa, @sb, @sc, @sd, @se, @sf); $rc = ""; for (my $i=0; $i&;); controls; nonascii, all; or off. Or a Perl Unicode-class name (experimental), or '!' and a Perl Unicode-class name, for negated. -cwidth n Column width per character (default 3). -blank Put a blank line after each set of translations. -head Put a header with offsets over each set of translations. -jl n Start at line n (see also -lineend). -lineend t Are input line-ends m(ac), d(os), or default u(nix)? (only really matters if using -jl) -listchars Just show a table of Latin-1 characters. -nog1 Do not try to display Latin1/G1 characters (d161-255, xA1-FF). -nocolor Do not display various characters in color (= -color off). -user Use interactive mode (unfinished). -help-user for more info. Known bugs and limitations: Bash will wrap lines too soon if color is used, apparently because it counts the length of the color escapes into the line length. Unicode characters will display as their component bytes, since most terminal programs (such as SecureCRT) assume Latin-1. A major upgrade may eventually do something more useful with wide characters. "; } sub helpUser { print " Interactive mode (not finished): Use '-user', then: Commands: quit Exit this program Move forward one block (i.e., one screenful, see 'lines') + n Move forward n blocks - n Move backward n blocks (default one block) @ n Move to file offset n find s Find string (which may include nonzero \\xx byte codes) next n Find next (with n, finds next n offsets & diffs) back Return to previous place diff n m Subtract m from n, display result in hex/decimal/octal log Append current block to logfile Setting options: chunk n Number of bytes per display line (default 16) header n 0 to hide, 1 to show header line lines n Number of display lines to show at once logfile name Set name of log file to write macro m Start taking commands from specified file By default, Latin-1 printable characters will be shown as themselves, CR and LF will be shown as a red paragraph sign, space and tab as space. Numbers can be provided in decimal (999), octal (0777), or hex (0xFFF). "; }