[prev in list] [next in list] [prev in thread] [next in thread]
List: busybox
Subject: Re: grep extremely slow
From: walter harms <wharms () bfs ! de>
Date: 2006-04-29 13:46:02
Message-ID: 44536E1A.1090508 () bfs ! de
[Download RAW message or body]
hi rich,
glibc has a getline() function to help with that. i attached a small
example that read a file line by line and reports matches.
This is an uncorrected testprogramm, please be carefull.
re,
walter
Rich Felker wrote:
> On Fri, Apr 07, 2006 at 03:24:03AM -0400, Rich Felker wrote:
>> I've observed that busybox grep is still 20x slower than gnu grep,
>> even with the regcomp issue I reported a while back fixed. The problem
>> seems to be the bb_get_chunk_from_file function, which reads a single
>> character at a time from the file using getc. Not sure what's the best
>> way to fix it without breaking semantics needed by other applets..
>> anyone care to take a look?
>
> Here's an optimized bb_get_chomped_line_from_file:
>
> char *bb_get_chomped_line_from_file(FILE *file)
> {
> size_t idx, len;
> char *buf;
>
> for (idx=len=0, buf = NULL; !buf || buf[len-2] != '\n'; ) {
> idx = len;
> buf = xrealloc(buf, len += 80);
> buf[len-2] = '\n';
> if (!fgets(buf+idx, len-idx, file)) {
> if (!idx) {
> free(buf);
> return NULL;
> }
> break;
> }
> }
> for (; buf[idx] && buf[idx] != '\n'; idx++);
> buf[idx] = 0;
> return buf;
> }
>
> This seems to improve performance by several times, but it's still
> about 6-8x slower than gnu grep from my testing...
>
> Rich
>
> _______________________________________________
> busybox mailing list
> busybox@busybox.net
> http://busybox.net/cgi-bin/mailman/listinfo/busybox
>
>
>
["mygrep.c" (text/x-csrc)]
/*
simple grep
read file line by line
cmp with regexp
print lines that match
*/
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <regex.h>
#include <string.h>
struct re_conf {
char *pattern;
int reflags;
};
static char *del_tail_space(char *s)
{
int i=strlen(s);
while(i--) {
if ( isspace(s[i])) s[i]='\0';
else break;
}
return s;
}
static char *del_head_space(char *s)
{
while(*s!=0) {
if ( isspace(*s)) s++;
else break;
}
return s;
}
char *chop(char *s)
{
return del_head_space(del_tail_space(s));
}
/*
gefundene zeile in db eintragen
nicht identifizierbare zeilen sind immer ein fehler
*/
int process_line(char *line)
{
char *cmd[]={
"LID","LCO","TNR","TFN","BTT","BTI","ATB","SWT","LWP",
"SMP","TSO","TSC","SMF",NULL
};
char *cur;
int i=0;
for(cur=cmd[0];cur != NULL; cur=cmd[++i] )
{
if (strncmp(cur,line,3) == 0 )
{
printf("detect:%s\n",cur);
return 0;
}
}
return 1;
}
FILE *xfopen(char *name,char *mode)
{
FILE *fp;
if ( name[0] == '-' && name[1] == '\0')
return stdin;
fp=fopen(name,mode);
if (fp==NULL) {
fprintf(stderr,"can not open %s:%s\n",name,strerror(errno));
exit(1);
}
return fp;
}
int check_lines(FILE *fp,struct re_conf *reconf)
{
int ret;
char *line;
size_t len;
ssize_t read;
line=NULL;
len=0;
regex_t regex;
regcomp(®ex, reconf->pattern, reconf->reflags);
while(1)
{
read=getline(&line,&len,fp);
if (read<0)
break;
ret = regexec(®ex, line, 0, NULL, 0);
if (ret == 0 )
{
printf("test: %s",line);
if ( process_line(line) < 0)
printf("error\n");
else
printf("ok\n");
}
}
regfree(®ex);
if (line)
free(line);
return EXIT_SUCCESS;
}
int main()
{
char *fname="controld.conf";
struct re_conf reconf= {
.reflags=0
};
FILE *fp;
fp=xfopen(fname,"r");
/*
REG_ICASE ignore case
REG_NEWLINE
Match-any-character operators don't match a newline
*/
reconf.pattern="^[A-Z][A-Z][A-Z]";
reconf.reflags= REG_ICASE;
check_lines(fp,&reconf);
fclose(fp);
return 0;
}
_______________________________________________
busybox mailing list
busybox@busybox.net
http://busybox.net/cgi-bin/mailman/listinfo/busybox
[prev in list] [next in list] [prev in thread] [next in thread]
Configure |
About |
News |
Add a list |
Sponsored by KoreLogic