cleanfile 3.46 KB
Newer Older
1
#!/usr/bin/env perl
2
# SPDX-License-Identifier: GPL-2.0
3 4 5 6 7
#
# Clean a text file -- or directory of text files -- of stealth whitespace.
# WARNING: this can be a highly destructive operation.  Use with caution.
#

8
use warnings;
9 10 11
use bytes;
use File::Basename;

12 13 14
# Default options
$max_width = 79;

15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
# Clean up space-tab sequences, either by removing spaces or
# replacing them with tabs.
sub clean_space_tabs($)
{
    no bytes;			# Tab alignment depends on characters

    my($li) = @_;
    my($lo) = '';
    my $pos = 0;
    my $nsp = 0;
    my($i, $c);

    for ($i = 0; $i < length($li); $i++) {
	$c = substr($li, $i, 1);
	if ($c eq "\t") {
	    my $npos = ($pos+$nsp+8) & ~7;
	    my $ntab = ($npos >> 3) - ($pos >> 3);
	    $lo .= "\t" x $ntab;
	    $pos = $npos;
	    $nsp = 0;
	} elsif ($c eq "\n" || $c eq "\r") {
	    $lo .= " " x $nsp;
	    $pos += $nsp;
	    $nsp = 0;
	    $lo .= $c;
	    $pos = 0;
	} elsif ($c eq " ") {
	    $nsp++;
	} else {
	    $lo .= " " x $nsp;
	    $pos += $nsp;
	    $nsp = 0;
	    $lo .= $c;
	    $pos++;
	}
    }
    $lo .= " " x $nsp;
    return $lo;
}

55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79
# Compute the visual width of a string
sub strwidth($) {
    no bytes;			# Tab alignment depends on characters

    my($li) = @_;
    my($c, $i);
    my $pos = 0;
    my $mlen = 0;

    for ($i = 0; $i < length($li); $i++) {
	$c = substr($li,$i,1);
	if ($c eq "\t") {
	    $pos = ($pos+8) & ~7;
	} elsif ($c eq "\n") {
	    $mlen = $pos if ($pos > $mlen);
	    $pos = 0;
	} else {
	    $pos++;
	}
    }

    $mlen = $pos if ($pos > $mlen);
    return $mlen;
}

80 81
$name = basename($0);

82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97
@files = ();

while (defined($a = shift(@ARGV))) {
    if ($a =~ /^-/) {
	if ($a eq '-width' || $a eq '-w') {
	    $max_width = shift(@ARGV)+0;
	} else {
	    print STDERR "Usage: $name [-width #] files...\n";
	    exit 1;
	}
    } else {
	push(@files, $a);
    }
}

foreach $f ( @files ) {
98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136
    print STDERR "$name: $f\n";

    if (! -f $f) {
	print STDERR "$f: not a file\n";
	next;
    }

    if (!open(FILE, '+<', $f)) {
	print STDERR "$name: Cannot open file: $f: $!\n";
	next;
    }

    binmode FILE;

    # First, verify that it is not a binary file; consider any file
    # with a zero byte to be a binary file.  Is there any better, or
    # additional, heuristic that should be applied?
    $is_binary = 0;

    while (read(FILE, $data, 65536) > 0) {
	if ($data =~ /\0/) {
	    $is_binary = 1;
	    last;
	}
    }

    if ($is_binary) {
	print STDERR "$name: $f: binary file\n";
	next;
    }

    seek(FILE, 0, 0);

    $in_bytes = 0;
    $out_bytes = 0;
    $blank_bytes = 0;

    @blanks = ();
    @lines  = ();
137
    $lineno = 0;
138 139

    while ( defined($line = <FILE>) ) {
140
	$lineno++;
141 142 143 144 145 146 147 148 149 150 151 152 153 154 155
	$in_bytes += length($line);
	$line =~ s/[ \t\r]*$//;		# Remove trailing spaces
	$line = clean_space_tabs($line);

	if ( $line eq "\n" ) {
	    push(@blanks, $line);
	    $blank_bytes += length($line);
	} else {
	    push(@lines, @blanks);
	    $out_bytes += $blank_bytes;
	    push(@lines, $line);
	    $out_bytes += length($line);
	    @blanks = ();
	    $blank_bytes = 0;
	}
156 157 158 159 160 161

	$l_width = strwidth($line);
	if ($max_width && $l_width > $max_width) {
	    print STDERR
		"$f:$lineno: line exceeds $max_width characters ($l_width)\n";
	}
162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178
    }

    # Any blanks at the end of the file are discarded

    if ($in_bytes != $out_bytes) {
	# Only write to the file if changed
	seek(FILE, 0, 0);
	print FILE @lines;

	if ( !defined($where = tell(FILE)) ||
	     !truncate(FILE, $where) ) {
	    die "$name: Failed to truncate modified file: $f: $!\n";
	}
    }

    close(FILE);
}