/* Copyright (c) 1991 Sun Wu and Udi Manber. All Rights Reserved. */ /* multipattern matcher */ #include #include #include #define MAXPAT 256 #define MAXLINE 1024 #define MAXSYM 256 #define MAXMEMBER1 4096 #define MAXPATFILE 260000 #define BLOCKSIZE 8192 #define MAXHASH 8192 #define mm 8191 #define max_num 30000 #define W_DELIM 128 #define L_DELIM 10 extern COUNT, FNAME, SILENT, FILENAMEONLY, num_of_matched; extern INVERSE; extern WORDBOUND, WHOLELINE, NOUPPER; extern unsigned char CurrentFileName[], Progname[]; extern total_line; int LONG = 0; int SHORT = 0; int p_size=0; unsigned char SHIFT1[MAXMEMBER1]; unsigned char tr[MAXSYM]; unsigned char tr1[MAXSYM]; struct pat_list { int index; struct pat_list *next; } *HASH[MAXHASH]; struct pat_list *pt, *qt; unsigned char buf[MAXPATFILE+BLOCKSIZE]; unsigned char pat_spool[MAXPATFILE+2*max_num+MAXPAT]; unsigned char *patt[max_num]; unsigned char pat_len[max_num]; prepf(fp) int fp; { int length=0, i, p=1, pdx=0, num_pat; unsigned char *pat_ptr=pat_spool, temp[10]; unsigned Mask = 15; int num_read; while((num_read = read(fp, buf+length, BLOCKSIZE)) > 0) { length = length + num_read; if(length > MAXPATFILE) { fprintf(stderr, "%s: maximum pattern file size is %d\n", Progname, MAXPATFILE); exit(2); } } buf[length] = '\n'; i = 0; p=1; while(imax_num) { fprintf(stderr, "%s: maximum number of patterns is %d\n", Progname, max_num); exit(2); } for(i=1; i<20; i++) *pat_ptr = i; /* boundary safety zone */ for(i=0; i< MAXSYM; i++) tr[i] = i; if(NOUPPER) { for(i='A'; i<= 'Z'; i++) tr[i] = i + 'a' - 'A'; } if(WORDBOUND) { for(i=0; i<128; i++) if(!isalnum(i)) tr[i] = W_DELIM; } for(i=0; i< MAXSYM; i++) tr1[i] = tr[i]&Mask; num_pat = p-1; p_size = MAXPAT; for(i=1 ; i <= num_pat; i++) { p = strlen(patt[i]); pat_len[i] = p; if(p!=0 && p < p_size) p_size = p; } if(p_size == 0) { fprintf(stderr, "%s: the pattern file is empty\n", Progname); exit(2); } if(length > 400 && p_size > 2) LONG = 1; if(p_size == 1) SHORT = 1; for(i=0; i 0) { if(INVERSE && COUNT) countline(text+MAXLINE, num_read); buf_end = end = MAXLINE + num_read -1 ; while(text[end] != r_newline && end > MAXLINE) end--; residue = buf_end - end + 1 ; text[start-1] = r_newline; if(SHORT) m_short(text, start, end); else monkey1(text, start, end); if(FILENAMEONLY && num_of_matched) { printf("%s\n", CurrentFileName); return; } start = MAXLINE - residue; if(start < 0) { start = 1; } strncpy(text+start, text+end, residue); } /* end of while(num_read = ... */ text[MAXLINE] = '\n'; text[start-1] = '\n'; if(residue > 1) { if(SHORT) m_short(text, start, end); else monkey1(text, start, end); } return; } /* end mgrep */ countline(text, len) unsigned char *text; int len; { int i; for (i=0; iindex; p = p->next; qx = text-m1; j = 0; while(tr[patt[pat_index][j]] == tr[*(qx++)]) j++; if (j > m1 ) { if(pat_len[pat_index] <= j) { if(text > textend) return; num_of_matched++; if(FILENAMEONLY || SILENT) return; MATCHED=1; if(COUNT) { while (*text != '\n') text++; } else { if(!INVERSE) { if(FNAME) printf("%s: ",CurrentFileName); while(*(--text) != '\n'); while(*(++text) != '\n') putchar(*text); printf("\n"); } else { if(FNAME) printf("%s: ",CurrentFileName); while(*(--text) != '\n'); if(lastout < text) OUT=1; while(lastout < text) putchar(*lastout++); if(OUT) { putchar('\n'); OUT=0; } while(*(++text) != '\n'); lastout=text+1; } } /* else { if(FNAME) printf("%s: ",CurrentFileName); while(*(--text) != '\n'); while(*(++text) != '\n') putchar(*text); printf("\n"); } */ } } if(MATCHED) break; } if(!MATCHED) shift = 1; else { MATCHED = 0; shift = m1; } } text = text + shift; } if(INVERSE && !COUNT) while(lastout <= textend) putchar(*lastout++); } m_short( text, start, end ) int start, end; register unsigned char *text; { register unsigned char *textend; register unsigned i; register int j; register struct pat_list *p, *q; register int pat_index; int MATCHED=0; int OUT=0; unsigned char *lastout; unsigned char *qx; textend = text + end; lastout = text + start + 1; text = text + start - 1 ; while (++text <= textend) { p = HASH[*text]; while(p != 0) { pat_index = p->index; p = p->next; qx = text; j = 0; while(tr[patt[pat_index][j]] == tr[*(qx++)]) j++; if(pat_len[pat_index] <= j) { if(text >= textend) return; num_of_matched++; if(FILENAMEONLY || SILENT) return; if(COUNT) { while (*text != '\n') text++; } else { if(FNAME) printf("%s: ",CurrentFileName); if(!INVERSE) { while(*(--text) != '\n'); while(*(++text) != '\n') putchar(*text); printf("\n"); MATCHED = 1; } else { while(*(--text) != '\n'); if(lastout < text) OUT=1; while(lastout < text) putchar(*lastout++); if(OUT) { putchar('\n'); OUT=0; } while(*(++text) != '\n'); lastout=text+1; MATCHED = 1; } } } if(MATCHED) break; } MATCHED = 0; } /* while */ if(INVERSE && !COUNT) while(lastout <= textend) putchar(*lastout++); } f_prep(pat_index, Pattern) unsigned char *Pattern ; int pat_index; { int i, j, m; register unsigned hash, Mask=15; m = p_size; for (i = m-1; i>=(1+LONG); i--) { hash = (Pattern[i] & Mask); hash = (hash << 4) + (Pattern[i-1]& Mask); if(LONG) hash = (hash << 4) + (Pattern[i-2] & Mask); if(SHIFT1[hash] >= m-1-i) SHIFT1[hash] = m-1-i; } if(SHORT) Mask = 255; /* 011111111 */ hash = 0; for(i = m-1; i>=0; i--) { hash = (hash << 4) + (tr[Pattern[i]]&Mask); } /* if(INVERSE) hash = Pattern[1]; */ hash=hash&mm; qt = (struct pat_list *) malloc(sizeof(struct pat_list)); qt->index = pat_index; pt = HASH[hash]; qt->next = pt; HASH[hash] = qt; }