From d1a19f0d477a6249d8af9322317b8434b86260ea Mon Sep 17 00:00:00 2001 From: Nicholas Noll Date: Wed, 13 Oct 2021 08:27:37 -0700 Subject: fix(email): bytes error. updated vendoring --- sys/cmd/rc/glob.c | 412 +++++++++++++++++++++++++++++++----------------------- 1 file changed, 240 insertions(+), 172 deletions(-) (limited to 'sys/cmd/rc/glob.c') diff --git a/sys/cmd/rc/glob.c b/sys/cmd/rc/glob.c index 95b2ef3..baea44d 100644 --- a/sys/cmd/rc/glob.c +++ b/sys/cmd/rc/glob.c @@ -1,199 +1,267 @@ #include "rc.h" -#include - -static Word *matches; -static char buffer[6*1024]; - -// ----------------------------------------------------------------------- -// main exports +#include "exec.h" +#include "fns.h" +char *globname; +struct word *globv; +/* + * delete all the GLOB marks from s, in place + */ void -unglob(char *s) +deglob(char *s) { - char *t = s; - do { - if(*t==GLOB) - t++; - *s++ = *t; - } while(*t++); + char *t = s; + do{ + if(*t==GLOB) + t++; + *s++=*t; + }while(*t++); } -/* - * inspiration from rsc's blog post - * modified for utf8 sequences and character classes - * returns 1 if string matches pattern is found, 0 otherwise - */ -static int -match(char *s, char *p) +globcmp(const void *s, const void *t) { - int c, ns, np; - rune sr, pr, lo, tr, hi; - char *sb = s, *ss = s, *pp = p; - while(*s || *p){ - if(*p){ - ns = utf8·bytetorune(&sr, s); - np = utf8·bytetorune(&pr, p); - - if(pr==GLOB){ - np = utf8·bytetorune(&pr, ++p); - switch(pr){ - case '?': /* single match */ - if(*s){ - p+=np, s+=ns; - continue; - } - case '[': /* class match */ - np = utf8·bytetorune(&pr, ++p); - if((c = (pr == '~'))) - np = utf8·bytetorune(&pr, ++p); - - lo = pr; - while(lo != ']' && *p){ - utf8·bytetorune(&tr, p+np); /* peek ahead */ - if(tr != '-') - hi = lo; - else { - p += np + 1, np = utf8·bytetorune(&hi, p); - if(!hi) /* we hit a syntax error */ - return 0; - if(hi < lo) - tr = hi, hi = lo, lo = tr; - } - if(c ^ (lo<=sr && sr<= hi)) - goto match; - p += np, np = utf8·bytetorune(&lo, p); - } - return 0; - match: - while (*p++ != ']' && *p); /* just iterate byte-wise */ - s += ns; - continue; - case '*': /* zero-or-more match */ - pp = p-1, ss = s+ns; - p++; - continue; - case GLOB: - if (sr != GLOB) - return 0; - s++, p++; - continue; - default: - panic("unrecognized glob operation", pr); - } - } - - if (sr==pr){ - s+=ns, p+=np; - continue; - } - } - /* hit end of pattern with no match, restart at last star */ - if (ss > sb) { - if (!*ss) /* hit end of string while matching a star */ - return 1; - - s = ss, p = pp; - continue; - } - /* mismatch */ - return 0; - } - return 1; + return strcmp(*(char**)s, *(char**)t); } -static void -globdir(char *p, char *path, int fd) +globsort(word *left, word *right) { - DIR *d = nil; - char *g; /* pattern offset (base of new GLOB) */ - char *b; /* pointer into path */ - int i, j; - struct dirent *e; - - if(!*p) { - printf("making path %s\n", path); - matches = newword(buffer, matches); - return; - } - - if((fd = openat(fd, path[0]?path:".", O_RDONLY|O_CLOEXEC|O_DIRECTORY)) < 0) - return; - d = fdopendir(fd); - - for(g = p, b = path; *g; b++) { - if(*g==GLOB) - break; - *b=*g++; - if(*b == '/') { - *b = 0; - /* open new directory (close if we have opened another already */ - if ((fd = openat(fd, path, O_RDONLY|O_CLOEXEC|O_DIRECTORY)) < 0) - goto cleanup; - closedir(d); - d = fdopendir(fd); - *b = '/'; - path = b, p = g; - } - } + char **list; + word *a; + int n = 0; + for(a = left;a!=right;a = a->next) n++; + list = (char **)emalloc(n*sizeof(char *)); + for(a = left,n = 0;a!=right;a = a->next,n++) list[n] = a->word; + qsort((void *)list, n, sizeof(void *), globcmp); + for(a = left,n = 0;a!=right;a = a->next,n++) a->word = list[n]; + efree((char *)list); +} +/* + * Push names prefixed by globname and suffixed by a match of p onto the astack. + * namep points to the end of the prefix in globname. + */ - /* if we are at the end of the pattern, check if name exists */ - if(!*g) { - *b = 0; - if(faccessat(fd, path, F_OK, AT_SYMLINK_NOFOLLOW) == 0) - matches = newword(buffer, matches); - goto cleanup; - } +void +globdir(char *p, char *namep) +{ + char *t, *newp; + int f; + /* scan the pattern looking for a component with a metacharacter in it */ + if(*p=='\0'){ + globv = newword(globname, globv); + return; + } + t = namep; + newp = p; + while(*newp){ + if(*newp==GLOB) + break; + *t=*newp++; + if(*t++=='/'){ + namep = t; + p = newp; + } + } + /* If we ran out of pattern, append the name if accessible */ + if(*newp=='\0'){ + *t='\0'; + if(access(globname, 0)==0) + globv = newword(globname, globv); + return; + } + /* read the directory and recur for any entry that matches */ + *namep='\0'; + if((f = Opendir(globname[0]?globname:"."))<0) return; + while(*newp!='/' && *newp!='\0') newp++; + while(Readdir(f, namep, *newp=='/')){ + if(matchfn(namep, p)){ + for(t = namep;*t;t++); + globdir(newp, t); + } + } + Closedir(f); +} +/* + * Push all file names matched by p on the current thread's stack. + * If there are no matches, the list consists of p. + */ - /* we have a non-trivial pattern to match */ - /* partition on the next directory */ - while(*g && *g!='/') - g++; +void +glob(char *p) +{ + word *svglobv = globv; + int globlen = Globsize(p); + if(!globlen){ + deglob(p); + globv = newword(p, globv); + return; + } + globname = emalloc(globlen); + globname[0]='\0'; + globdir(p, globname); + efree(globname); + if(svglobv==globv){ + deglob(p); + globv = newword(p, globv); + } + else + globsort(globv, svglobv); +} +/* + * Do p and q point at equal utf codes + */ - if(*g){ - j = 1; - *g = 0; - } else - j = 0; +int +equtf(char *p, char *q) +{ + if(*p!=*q) + return 0; + if(twobyte(*p)) return p[1]==q[1]; + if(threebyte(*p)){ + if(p[1]!=q[1]) + return 0; + if(p[1]=='\0') + return 1; /* broken code at end of string! */ + return p[2]==q[2]; + } + if(fourbyte(*p)){ + if(p[1]!=q[1]) + return 0; + if(p[1]=='\0') + return 1; + if(p[2]!=q[2]) + return 0; + if(p[2]=='\0') + return 1; + return p[3]==q[3]; + } + return 1; +} +/* + * Return a pointer to the next utf code in the string, + * not jumping past nuls in broken utf codes! + */ - while((e = readdir(d))) { - if (e->d_name[0] == '.') - if (e->d_name[1] == 0 || /* . */ - (e->d_name[1] == '.' && e->d_name[2] == 0)) /* .. */ - continue; +char* +nextutf(char *p) +{ + if(twobyte(*p)) return p[1]=='\0'?p+1:p+2; + if(threebyte(*p)) return p[1]=='\0'?p+1:p[2]=='\0'?p+2:p+3; + if(fourbyte(*p)) return p[1]=='\0'?p+1:p[2]=='\0'?p+2:p[3]=='\0'?p+3:p+4; + return p+1; +} +/* + * Convert the utf code at *p to a unicode value + */ - for(i=0;e->d_name[i];i++) - b[i]=e->d_name[i]; - b[i]=0; +int +unicode(char *p) +{ + int u=*p&0xff; + if(twobyte(u)) return ((u&0x1f)<<6)|(p[1]&0x3f); + if(threebyte(u)) return (u<<12)|((p[1]&0x3f)<<6)|(p[2]&0x3f); + if(fourbyte(u)) return (u<<18)|((p[1]&0x3f)<<12)|((p[2]&0x3f)<<6)|(p[3]&0x3f); + return u; +} +/* + * Does the string s match the pattern p + * . and .. are only matched by patterns starting with . + * * matches any sequence of characters + * ? matches any single character + * [...] matches the enclosed list of characters + */ - if(match(path, p)) - globdir(g+j, b, fd); - } +int +matchfn(char *s, char *p) +{ + if(s[0]=='.' && (s[1]=='\0' || s[1]=='.' && s[2]=='\0') && p[0]!='.') + return 0; + return match(s, p, '/'); +} - printf("successful\n"); -cleanup: - printf("cleaning up\n"); - /* NOTE: a successful closedir also closes the file descriptor */ - closedir(d); - return; +int +match(char *s, char *p, int stop) +{ + int compl, hit, lo, hi, t, c; + for(;*p!=stop && *p!='\0';s = nextutf(s),p = nextutf(p)){ + if(*p!=GLOB){ + if(!equtf(p, s)) return 0; + } + else switch(*++p){ + case GLOB: + if(*s!=GLOB) + return 0; + break; + case '*': + for(;;){ + if(match(s, nextutf(p), stop)) return 1; + if(!*s) + break; + s = nextutf(s); + } + return 0; + case '?': + if(*s=='\0') + return 0; + break; + case '[': + if(*s=='\0') + return 0; + c = unicode(s); + p++; + compl=*p=='~'; + if(compl) + p++; + hit = 0; + while(*p!=']'){ + if(*p=='\0') + return 0; /* syntax error */ + lo = unicode(p); + p = nextutf(p); + if(*p!='-') + hi = lo; + else{ + p++; + if(*p=='\0') + return 0; /* syntax error */ + hi = unicode(p); + p = nextutf(p); + if(hinext); + glob(gl->word); + } } -#if 0 -int -main() +void +globlist(void) { - errio = openfd(2); - glob("\x01*"); - pval(errio, matches); - flush(&errio); + word *a; + globv = 0; + globlist1(runq->argv->words); + poplist(); + pushlist(); + if(globv){ + for(a = globv;a->next;a = a->next); + a->next = runq->argv->words; + runq->argv->words = globv; + } } -#endif -- cgit v1.2.1