[prev in list] [next in list] [prev in thread] [next in thread]
List: linux-security-audit
Subject: argcount script
From: Antonomasia <ant () notatla ! demon ! co ! uk>
Date: 2000-06-30 23:51:56
[Download RAW message or body]
This now handles %% in formats and %m in syslog(3) formats.
Running it in practice on ftpd turns up numerous false positives
due to incomplete recognition of [()",] in deciding the boundaries
of function arguments.
A crude way to filter most of these is:
argcount.plx ftpd.c | awk -F'"' '{if (!$NF%2) print}'
but if people have better suggestions I'd welcome them.
#!/usr/bin/perl -w
require 5.004;
# Source code scanner for argument counting in C.
# See http://www.notatla.demon.co.uk/SOFTWARE for details.
# Gnu Public Licence V2 or later.
# $Id: argcount.plx,v 1.10 2000/06/30 23:36:11 ant Exp $
# remaining defects marked with XXX
# Horst von Brand <vonbrand@sleipnir.valparaiso.cl> suggests
# gcc -Wall -Wmissing-prototypes -Wmissing-declarations ,
# which is good, but cannot check variables used as formats.
#
# sprintf(s, "PASV port %i assigned to %s", i, remoteident);
# syslog(LOG_DEBUG, s);
#
# Here the programmer has to determine that the string remoteident
# does not contain formatting.
# This is the list of functions and the argument number of
# the format counting from 0.
# Functions are as in RH5.1 + buf_appendf() from an application.
%uglies=( "fprintf" => 1,
"fscanf" => 1,
"printf" => 0,
"scanf" => 0,
"snprintf" => 2,
"sprintf" => 1,
"sscanf" => 1,
"syslog" => 1,
"vfprintf" => 1,
"vprintf" => 0,
"vsprintf" => 1,
"vsnprintf" => 2,
"buf_appendf" => 1);
############################################
sub SCAN_ONE_SRC_FILE
{
my $srcname; # what source file to read
my $src; # line of source under scrutiny
my $fcall; # name of a function
my $properties; # our hardcoded description of a function
my $argnum; # see above
my $fname; # a file name
my $usage; # record of where and how a file is used
my $rc; # return code
my $answer; # return code from this function
my @tmpprops;
my %findings;
my $ln=0; # do we need to fake line numbers ?
$srcname=shift;
# Use dot as a special filename to flag a preprocessed file.
# Should not match a real filename because dot means a directory.
if ("." eq $srcname) {$ln=1; $srcname=shift;}
$rc=open SRC, "<$srcname";
if (!$rc) {
$duffstuff = $duffstuff."\nCannot open ". $srcname;
return "";
}
SOURCEFILE: while (<SRC>) {
chomp;
$src=$_;
# this is where we track line numbers - the possibilities are:
# - use the line number of the original input file applied
# to this preprocessed file as a (numeric) label
# - use the previous value as above (for macro cont'n lines)
# - use the line number of the input file (for the non -E mode)
s/([0-9]+):.*/$1/;
if ($ln) {
if ( $_ =~ /[0-9]+/ ) {
$lastline=$_;
} else {
$_=$lastline;
}
$line=$_;
} else {
$line=$.;
}
# Loop over uglies to see if any appear in the line.
# If so, note what it was and what argument appeared, and the position
# in the source.
# Later tally up other uglies with the same argument for
# possible danger.
while (($fcall,$properties) = each(%uglies) ) {
if ($debug) {printf(OUT "FCALL IS %s\n",$fcall);}
@tmpprops=split(/,/,$properties);
while(@tmpprops) {
$argnum=$tmpprops[0];
shift(@tmpprops);
if ($debug) {
printf(OUT " ARGNUM IS %s\n",$argnum);
}
$_=$src;
# first test and remove function name and preceding args
if (s/(.*)\b$fcall\(([^,]+,){$argnum}//) {
# remove (and count) following args and whitespace
# XXX breaks if parens or quoted commas appear
@junk=@percent={};
$wanted=$got=0;
@junk=split(/,/);
$got=(@junk)-1 if (@junk > 1);
s/[,)].*// ;
s/\s+//g ;
@percent=split(/%/);
if (@percent > 1) {
# count the conversion specs; a number
# similar to the number of percent signs
for($cs=1;$cs<((@percent));$cs++) {
if ("" eq $percent[$cs]) {
# empty field indicates either a %%
# or SNH an odd % at the end of the format
die ("misparsed format") if ($cs >= @percent);
$cs++;
printf(OUT " EMPTY %%\n") if ($debug);
next;
}
# syslog() has %m as a special case.
$wanted++ unless (("syslog" eq $fcall)&&
($percent[$cs] =~ /^m/));
}
}
if ($verbose){
printf(OUT "%8d: %s\n", $., $src);
if ($verbose>1){
printf(OUT " %s(%s)\n",$fcall, $_);
}
}
if ( $_ =~ /"[^"]*"/ ) {
# fixed format string
if ($wanted != $got){
printf(OUT "FORMAT (%d!=%d) %8d: %s %s\n",
$wanted, $got,$line,$fcall,$_);
}
} else {
# variable format string
printf(OUT "FORMAT %8d: %s %s\n",
$line, $fcall, $_);
}
}
}
}
$debug=0; # debug mode ends after one input file
}
close(SRC); # end of reading that source file
return $answer;
}
############################################
# MAIN starts here
$debug=0;
$verbose=0;
$cpp=0;
$tmpdir="";
$duffstuff=""; # to hold error messages
$lastline="???"; # fallback used in tracing difficult line numbers
$cflags="";
$help="scancode [-v] [-d] [-h] -E -Y [-D] [-I] [-L] file1.c ...
-v verbose - can be used more than once
-d debug
-h this usage message
-E 'cpp -E' expansion of source files
-D -I and -L parameters are passed to gcc if -E is in use
";
# direct output to $ENV{"OUT"}, else STDOUT
if ($ENV{"OUT"}) {
my $outfilename=$ENV{"OUT"};
open OUT, ">>$outfilename";
} else {
open(OUT, ">&STDOUT")
}
@clargs=@ARGV;
SWITCHES: while (@clargs) {
my $arg=$clargs[0];
# switches start with a -
last SWITCHES unless ($arg =~ /^-/) ;
shift @clargs;
if ($arg =~ /^-[ILD].*/) {
$cflags .= " ". $arg;
next SWITCHES;
}
# -- means end of switches
last SWITCHES if $arg eq "--";
$arg=substr($arg, 1);
# now read it char-by-char - there are no long options
while ($arg) {
my $sense=0;
if ("v" eq substr($arg, 0,1)) {$verbose++; $sense=1;}
if ("d" eq substr($arg, 0,1)) {$debug=1; $sense=1;}
if ("E" eq substr($arg, 0,1)) {$cpp=1; $sense=1;}
if (!$sense) {
$|=1;
printf(OUT "Confused by: %s\n",$arg)
unless ("h" eq substr($arg, 0,1));
die($help);
}
$arg=substr($arg, 1);
}
}
FILENAMES: while (@clargs) {
my $text;
if ("." eq $clargs[0]) {shift @clargs; next FILENAMES;}
if ($cpp) {
my $count=1;
my $hinclude=0;
# run gcc-E on the file and scan the output
if (! $tmpdir) {
do {
$tmpdir=int(rand(1000000000));
if ($ENV{TMPDIR}) {
$tmpdir=$ENV{TMPDIR}."/".$tmpdir;
} else {
$tmpdir="/tmp/".$tmpdir;
}
} while (0==mkdir $tmpdir, 0700);
}
$_=$clargs[0];
($dirname,$basename)=/(.*)\/(.*)/;
if (!$dirname) {$dirname=".";}
if (!$basename) {$basename=$clargs[0];}
if ($debug) {
printf(OUT "dirname is %s\n",$dirname);
printf(OUT "basename is %s\n",$basename);
}
$tmpfile=$tmpdir."/".$basename;
# Copy the source file to another file with added line
# numbers, except on cpp directive lines.
open TF, ">$tmpfile" or die("Cannot write file with line numbers.");
open IF, $clargs[0] or
die("Cannot read source file.");
while (<IF>){
if ( ($_ =~ /^#.*/) || ($hinclude) ) {
print TF $_;
# avoid numbering preprocessor directives
# including continuation lines
$hinclude=1;
$hinclude=0 unless ($_ =~ /.*\\$/);
} else {
printf TF "%d: %s" , $count, $_;
}
$count++;
}
close TF;
# Now run gcc -E on that to remove comments, expand macros,
# decide ifdefs and whatever else cpp does.
$tmpfile2=$tmpfile."E";
open TF, ">$tmpfile2" or die("Cannot write file of gcc output.");
$cflagsdie=0;
if (!$cflags) {
$cflags="-I$dirname";
$cflagsdie=1;
}
open IF, "gcc -E $cflags $tmpfile |" or
die("Cannot run gcc -E.");
while (<IF>){
print TF;
}
if ($cflagsdie) {$cflags="";}
close TF;
$text=SCAN_ONE_SRC_FILE (".",$tmpfile2);
unlink $tmpfile, $tmpfile2;
} else {
# simple case (no gcc) just scan the source file
$text=SCAN_ONE_SRC_FILE ($clargs[0]);
}
# Results are printed here as a separate step from the
# SCAN_ONE_SRC_FILE function in case they needed a bit of
# massaging. Seems not so far.
if ($text) {
printf(OUT "%s\n",$clargs[0]);
printf(OUT "%s",$text);
}
# move on to next source file named on the command line
shift @clargs;
}
# show error messages and tidy up
printf OUT "%s\n",$duffstuff;
if ($tmpdir) {rmdir $tmpdir;}
close(OUT);
--
##############################################################
# Antonomasia ant@notatla.demon.co.uk #
# See http://www.notatla.demon.co.uk/ #
##############################################################
[prev in list] [next in list] [prev in thread] [next in thread]
Configure |
About |
News |
Add a list |
Sponsored by KoreLogic