[prev in list] [next in list] [prev in thread] [next in thread] 

List:       linux-security-audit
Subject:    argcount script
From:       Antonomasia <ant () notatla ! demon ! co ! uk>
Date:       2000-06-30 23:51:56
[Download RAW message or body]

This now handles %% in formats and %m in syslog(3) formats.
Running it in practice on ftpd turns up numerous false positives
due to incomplete recognition of [()",]  in deciding the boundaries
of function arguments.

A crude way to filter most of these is:
    argcount.plx  ftpd.c  | awk -F'"' '{if (!$NF%2) print}'
but if people have better suggestions I'd welcome them.
    


#!/usr/bin/perl -w
require 5.004;

# Source code scanner for argument counting in C.
# See http://www.notatla.demon.co.uk/SOFTWARE for details.
# Gnu Public Licence V2 or later.

# $Id: argcount.plx,v 1.10 2000/06/30 23:36:11 ant Exp $

# remaining defects marked with XXX

# Horst von Brand <vonbrand@sleipnir.valparaiso.cl> suggests
# gcc -Wall -Wmissing-prototypes -Wmissing-declarations ,
# which is good, but cannot check variables used as formats.
#
#   sprintf(s, "PASV port %i assigned to %s", i, remoteident);
#   syslog(LOG_DEBUG, s);
#
#   Here the programmer has to determine that the string remoteident
#   does not contain formatting.


# This is the list of functions and the argument number of
# the format counting from 0.
# Functions are as in RH5.1 + buf_appendf() from an application.

%uglies=( "fprintf" => 1,
          "fscanf" => 1,
          "printf" => 0,
          "scanf" => 0,
          "snprintf" => 2,
          "sprintf" => 1,
          "sscanf" => 1,
          "syslog" => 1,
          "vfprintf" => 1,
          "vprintf" => 0,
          "vsprintf" => 1,
          "vsnprintf" => 2,
          "buf_appendf" => 1);


############################################
sub SCAN_ONE_SRC_FILE 
{
my $srcname;                # what source file to read
my $src;                    # line of source under scrutiny
my $fcall;                  # name of a function
my $properties;             # our hardcoded description of a function
my $argnum;                 # see above
my $fname;                  # a file name
my $usage;                  # record of where and how a file is used
my $rc;                     # return code
my $answer;                 # return code from this function
my @tmpprops;
my %findings;
my $ln=0;                   # do we need to fake line numbers ?


$srcname=shift;
# Use dot as a special filename to flag a preprocessed file.
# Should not match a real filename because dot means a directory.
if ("." eq $srcname) {$ln=1; $srcname=shift;}

$rc=open SRC, "<$srcname";
if (!$rc) {
   $duffstuff = $duffstuff."\nCannot open ". $srcname;
   return "";
}

SOURCEFILE: while (<SRC>) {
   chomp;
   $src=$_;
   # this is where we track line numbers - the possibilities are:
   #  - use the line number of the original input file applied
   #    to this preprocessed file as a (numeric) label
   #  - use the previous value as above (for macro cont'n lines)
   #  - use the line number of the input file (for the non -E mode)
   s/([0-9]+):.*/$1/;
   if ($ln) {
       if ( $_ =~ /[0-9]+/ ) {
               $lastline=$_;
       } else {
               $_=$lastline;
       }
       $line=$_;
   } else {
       $line=$.;
   }

   # Loop over uglies to see if any appear in the line.
   # If so, note what it was and what argument appeared, and the position
   # in the source.
   # Later tally up other uglies with the same argument for
   # possible danger.

   while (($fcall,$properties) = each(%uglies) ) {
           if ($debug) {printf(OUT "FCALL IS %s\n",$fcall);}

           @tmpprops=split(/,/,$properties);

           while(@tmpprops) {
               $argnum=$tmpprops[0];
               shift(@tmpprops);
               if ($debug) {
                 printf(OUT "   ARGNUM IS %s\n",$argnum);
               }

               $_=$src;
               # first test and remove function name and preceding args
               if (s/(.*)\b$fcall\(([^,]+,){$argnum}//) {

          # remove (and count) following args and whitespace
          # XXX breaks if parens or quoted commas appear
                    @junk=@percent={};
                    $wanted=$got=0;
                    @junk=split(/,/);
                    $got=(@junk)-1 if (@junk > 1);
                    s/[,)].*// ;
                    s/\s+//g ;
                    
                    @percent=split(/%/);
                    if (@percent > 1) {
                        # count the conversion specs; a number
                        # similar to the number of percent signs
                        for($cs=1;$cs<((@percent));$cs++) {
                            if ("" eq $percent[$cs]) {
                                # empty field indicates either a %%
                                # or SNH an odd % at the end of the format
                                die ("misparsed format") if ($cs >= @percent);
                                $cs++;
                            printf(OUT " EMPTY %%\n") if ($debug);
                                next;
                            }
                            # syslog() has %m as a special case.
                            $wanted++ unless (("syslog" eq $fcall)&&
                                              ($percent[$cs] =~ /^m/));
                        }
                    }
                      if ($verbose){
                          printf(OUT "%8d:  %s\n", $., $src);
                          if ($verbose>1){
                               printf(OUT "         %s(%s)\n",$fcall, $_);
                          }
                      }
                      if ( $_ =~ /"[^"]*"/ ) {
                          # fixed format string  
                          if ($wanted != $got){
                               printf(OUT "FORMAT (%d!=%d) %8d:     %s %s\n",
                                           $wanted, $got,$line,$fcall,$_);
                          }
                      } else {
                          # variable format string 
                          printf(OUT "FORMAT %8d:     %s %s\n",
                                           $line, $fcall, $_);
                      }
               }
           }
   }
$debug=0; # debug mode ends after one input file
}
close(SRC);       # end of reading that source file

return $answer;
}

############################################

# MAIN starts here

$debug=0;
$verbose=0;
$cpp=0;
$tmpdir="";
$duffstuff="";       # to hold error messages
$lastline="???";     # fallback used in tracing difficult line numbers
$cflags="";
$help="scancode [-v] [-d] [-h] -E -Y [-D] [-I] [-L] file1.c ...
          -v   verbose - can be used more than once
          -d   debug
          -h   this usage message
          -E   'cpp -E' expansion of source files
          -D -I and -L parameters are passed to gcc if -E is in use
";


# direct output to $ENV{"OUT"}, else STDOUT
if ($ENV{"OUT"}) {
     my $outfilename=$ENV{"OUT"};
     open OUT, ">>$outfilename";
} else {
     open(OUT, ">&STDOUT") 
}

@clargs=@ARGV;


SWITCHES: while (@clargs) {
   my $arg=$clargs[0];
# switches start with a -
   last SWITCHES unless ($arg =~ /^-/) ;
   shift @clargs;
   if ($arg =~ /^-[ILD].*/) {
                  $cflags .= " ". $arg;
                  next SWITCHES;
   }

# -- means end of switches
   last SWITCHES if $arg eq "--";

      $arg=substr($arg, 1);
# now read it char-by-char - there are no long options
      while ($arg) {
         my $sense=0;
          if ("v" eq substr($arg, 0,1)) {$verbose++; $sense=1;}
          if ("d" eq substr($arg, 0,1)) {$debug=1; $sense=1;}
          if ("E" eq substr($arg, 0,1)) {$cpp=1; $sense=1;}
          if (!$sense) {
                $|=1;
                printf(OUT "Confused by: %s\n",$arg)
                        unless ("h" eq substr($arg, 0,1));
                die($help);
          }
          $arg=substr($arg, 1);
      }
}

FILENAMES: while (@clargs) {
    my $text;
    if ("." eq $clargs[0]) {shift @clargs; next FILENAMES;}
    if ($cpp) {
        my $count=1;
        my $hinclude=0;
        # run gcc-E on the file and scan the output
        if (! $tmpdir) {
            do { 
               $tmpdir=int(rand(1000000000));
               if ($ENV{TMPDIR}) {
                    $tmpdir=$ENV{TMPDIR}."/".$tmpdir;
               } else {
                    $tmpdir="/tmp/".$tmpdir;
               }
            } while (0==mkdir $tmpdir, 0700);
        }


        $_=$clargs[0];
        ($dirname,$basename)=/(.*)\/(.*)/;
        if (!$dirname)  {$dirname=".";}
        if (!$basename) {$basename=$clargs[0];}
        if ($debug) {
               printf(OUT "dirname is %s\n",$dirname);
               printf(OUT "basename is %s\n",$basename);
        }
        $tmpfile=$tmpdir."/".$basename;

        # Copy the source file to another file with added line
        # numbers, except on cpp directive lines.
        open TF, ">$tmpfile" or die("Cannot write file with line numbers.");
                      open IF, $clargs[0] or
                           die("Cannot read source file.");
                      while (<IF>){
                          if ( ($_ =~ /^#.*/) || ($hinclude) ) {
                              print TF $_;
                         # avoid numbering preprocessor directives
                         # including continuation lines
                              $hinclude=1;
                              $hinclude=0 unless ($_ =~ /.*\\$/);
                          } else {
                              printf TF "%d:  %s" , $count, $_;
                          }
                          $count++;
                      }
        close TF;

        # Now run gcc -E on that to remove comments, expand macros,
        # decide ifdefs and whatever else cpp does.
        $tmpfile2=$tmpfile."E";
        open TF, ">$tmpfile2" or die("Cannot write file of gcc output.");
                      $cflagsdie=0;
                      if (!$cflags) {
                              $cflags="-I$dirname";
                              $cflagsdie=1;
                      }
                      open IF, "gcc -E $cflags $tmpfile |" or
                           die("Cannot run gcc -E.");
                      while (<IF>){
                         print TF;
                      }
                      if ($cflagsdie) {$cflags="";}
        close TF;

        $text=SCAN_ONE_SRC_FILE (".",$tmpfile2);
        unlink $tmpfile, $tmpfile2;
    } else {
        # simple case (no gcc) just scan the source file
        $text=SCAN_ONE_SRC_FILE ($clargs[0]);
    }
    # Results are printed here as a separate step from the
    # SCAN_ONE_SRC_FILE function in case they needed a bit of
    # massaging.  Seems not so far.
    if ($text) {
         printf(OUT "%s\n",$clargs[0]);
         printf(OUT "%s",$text);
    }
    # move on to next source file named on the command line
    shift @clargs;
}

# show error messages and tidy up
printf OUT "%s\n",$duffstuff;
if ($tmpdir) {rmdir $tmpdir;}
close(OUT);

--
##############################################################
# Antonomasia   ant@notatla.demon.co.uk                      #
# See http://www.notatla.demon.co.uk/                        #
##############################################################

[prev in list] [next in list] [prev in thread] [next in thread] 

Configure | About | News | Add a list | Sponsored by KoreLogic