[prev in list] [next in list] [prev in thread] [next in thread] 

List:       busybox
Subject:    Re: grep extremely slow
From:       walter harms <wharms () bfs ! de>
Date:       2006-04-29 13:46:02
Message-ID: 44536E1A.1090508 () bfs ! de
[Download RAW message or body]

hi rich,
glibc has a getline() function to help with that. i attached a small 
example that read a file line by line and reports matches.
This is an uncorrected testprogramm, please be carefull.
re,
  walter


Rich Felker wrote:
> On Fri, Apr 07, 2006 at 03:24:03AM -0400, Rich Felker wrote:
>> I've observed that busybox grep is still 20x slower than gnu grep,
>> even with the regcomp issue I reported a while back fixed. The problem
>> seems to be the bb_get_chunk_from_file function, which reads a single
>> character at a time from the file using getc. Not sure what's the best
>> way to fix it without breaking semantics needed by other applets..
>> anyone care to take a look?
> 
> Here's an optimized bb_get_chomped_line_from_file:
> 
> char *bb_get_chomped_line_from_file(FILE *file)
> {
> 	size_t idx, len;
> 	char *buf;
> 
> 	for (idx=len=0, buf = NULL; !buf || buf[len-2] != '\n'; ) {
> 		idx = len;
> 		buf = xrealloc(buf, len += 80);
> 		buf[len-2] = '\n';
> 		if (!fgets(buf+idx, len-idx, file)) {
> 			if (!idx) {
> 				free(buf);
> 				return NULL;
> 			}
> 			break;
> 		}
> 	}
> 	for (; buf[idx] && buf[idx] != '\n'; idx++);
> 	buf[idx] = 0;
> 	return buf;
> }
> 
> This seems to improve performance by several times, but it's still
> about 6-8x slower than gnu grep from my testing...
> 
> Rich
> 
> _______________________________________________
> busybox mailing list
> busybox@busybox.net
> http://busybox.net/cgi-bin/mailman/listinfo/busybox
> 
> 
> 

["mygrep.c" (text/x-csrc)]

/*
  simple grep
  read file line by line
  cmp with regexp
  print lines that match
*/

#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <regex.h>
#include <string.h>

struct re_conf {
  char *pattern;
  int reflags;
};

static char *del_tail_space(char *s)
{
  int i=strlen(s);
    while(i--) {
    if ( isspace(s[i])) s[i]='\0';
         else break;
    }
  
    return s;
}

static char *del_head_space(char *s)
{
    while(*s!=0) {
      if ( isspace(*s)) s++;
         else break;
    }
  
    return s;
}


char *chop(char *s)
{
  return del_head_space(del_tail_space(s));
}




/*
  gefundene zeile in db eintragen
  nicht identifizierbare zeilen sind immer ein fehler
*/
 

int process_line(char *line)
{
  char *cmd[]={
  "LID","LCO","TNR","TFN","BTT","BTI","ATB","SWT","LWP",
    "SMP","TSO","TSC","SMF",NULL 
    };
  
  char *cur;
  int i=0;

  for(cur=cmd[0];cur != NULL; cur=cmd[++i] )
    {
      if (strncmp(cur,line,3) == 0 )
	{
	  printf("detect:%s\n",cur);
	  return 0;
	}
    }

  return 1;
}


FILE *xfopen(char *name,char *mode)
{
        FILE *fp;
        if ( name[0] == '-' && name[1] == '\0')
          return stdin;

        fp=fopen(name,mode);
        if (fp==NULL) {
          fprintf(stderr,"can not open %s:%s\n",name,strerror(errno));
          exit(1);
        }
        return fp;
}

int check_lines(FILE *fp,struct re_conf *reconf)
{

  int ret;
  char *line;
  size_t len;
  ssize_t read;

  line=NULL;
  len=0;
  regex_t regex;

  regcomp(&regex, reconf->pattern, reconf->reflags);

  while(1)
    {

      read=getline(&line,&len,fp);
      if (read<0)
	break;

      ret = regexec(&regex, line, 0, NULL, 0);

      if (ret == 0 )
	{
	  printf("test: %s",line);
	  if ( process_line(line) < 0)
	    printf("error\n");
	  else
    	    printf("ok\n");
	}

    }


   regfree(&regex);

      if (line)
	free(line);
    return EXIT_SUCCESS;
}


int main()
{
  char *fname="controld.conf";
  struct re_conf reconf= {
  .reflags=0
  };

  FILE *fp;

  fp=xfopen(fname,"r");

  /*
     REG_ICASE  ignore case
     REG_NEWLINE 
     Match-any-character operators don't match a newline
  */

  reconf.pattern="^[A-Z][A-Z][A-Z]";
  reconf.reflags= REG_ICASE;

  check_lines(fp,&reconf);

  fclose(fp);
  return 0;
}


_______________________________________________
busybox mailing list
busybox@busybox.net
http://busybox.net/cgi-bin/mailman/listinfo/busybox

[prev in list] [next in list] [prev in thread] [next in thread] 

Configure | About | News | Add a list | Sponsored by KoreLogic