aboutsummaryrefslogtreecommitdiff
path: root/sys/cmd/rc/glob.c
diff options
context:
space:
mode:
Diffstat (limited to 'sys/cmd/rc/glob.c')
-rw-r--r--sys/cmd/rc/glob.c412
1 files changed, 240 insertions, 172 deletions
diff --git a/sys/cmd/rc/glob.c b/sys/cmd/rc/glob.c
index 95b2ef3..baea44d 100644
--- a/sys/cmd/rc/glob.c
+++ b/sys/cmd/rc/glob.c
@@ -1,199 +1,267 @@
#include "rc.h"
-#include <dirent.h>
-
-static Word *matches;
-static char buffer[6*1024];
-
-// -----------------------------------------------------------------------
-// main exports
+#include "exec.h"
+#include "fns.h"
+char *globname;
+struct word *globv;
+/*
+ * delete all the GLOB marks from s, in place
+ */
void
-unglob(char *s)
+deglob(char *s)
{
- char *t = s;
- do {
- if(*t==GLOB)
- t++;
- *s++ = *t;
- } while(*t++);
+ char *t = s;
+ do{
+ if(*t==GLOB)
+ t++;
+ *s++=*t;
+ }while(*t++);
}
-/*
- * inspiration from rsc's blog post
- * modified for utf8 sequences and character classes
- * returns 1 if string matches pattern is found, 0 otherwise
- */
-static
int
-match(char *s, char *p)
+globcmp(const void *s, const void *t)
{
- int c, ns, np;
- rune sr, pr, lo, tr, hi;
- char *sb = s, *ss = s, *pp = p;
- while(*s || *p){
- if(*p){
- ns = utf8·bytetorune(&sr, s);
- np = utf8·bytetorune(&pr, p);
-
- if(pr==GLOB){
- np = utf8·bytetorune(&pr, ++p);
- switch(pr){
- case '?': /* single match */
- if(*s){
- p+=np, s+=ns;
- continue;
- }
- case '[': /* class match */
- np = utf8·bytetorune(&pr, ++p);
- if((c = (pr == '~')))
- np = utf8·bytetorune(&pr, ++p);
-
- lo = pr;
- while(lo != ']' && *p){
- utf8·bytetorune(&tr, p+np); /* peek ahead */
- if(tr != '-')
- hi = lo;
- else {
- p += np + 1, np = utf8·bytetorune(&hi, p);
- if(!hi) /* we hit a syntax error */
- return 0;
- if(hi < lo)
- tr = hi, hi = lo, lo = tr;
- }
- if(c ^ (lo<=sr && sr<= hi))
- goto match;
- p += np, np = utf8·bytetorune(&lo, p);
- }
- return 0;
- match:
- while (*p++ != ']' && *p); /* just iterate byte-wise */
- s += ns;
- continue;
- case '*': /* zero-or-more match */
- pp = p-1, ss = s+ns;
- p++;
- continue;
- case GLOB:
- if (sr != GLOB)
- return 0;
- s++, p++;
- continue;
- default:
- panic("unrecognized glob operation", pr);
- }
- }
-
- if (sr==pr){
- s+=ns, p+=np;
- continue;
- }
- }
- /* hit end of pattern with no match, restart at last star */
- if (ss > sb) {
- if (!*ss) /* hit end of string while matching a star */
- return 1;
-
- s = ss, p = pp;
- continue;
- }
- /* mismatch */
- return 0;
- }
- return 1;
+ return strcmp(*(char**)s, *(char**)t);
}
-static
void
-globdir(char *p, char *path, int fd)
+globsort(word *left, word *right)
{
- DIR *d = nil;
- char *g; /* pattern offset (base of new GLOB) */
- char *b; /* pointer into path */
- int i, j;
- struct dirent *e;
-
- if(!*p) {
- printf("making path %s\n", path);
- matches = newword(buffer, matches);
- return;
- }
-
- if((fd = openat(fd, path[0]?path:".", O_RDONLY|O_CLOEXEC|O_DIRECTORY)) < 0)
- return;
- d = fdopendir(fd);
-
- for(g = p, b = path; *g; b++) {
- if(*g==GLOB)
- break;
- *b=*g++;
- if(*b == '/') {
- *b = 0;
- /* open new directory (close if we have opened another already */
- if ((fd = openat(fd, path, O_RDONLY|O_CLOEXEC|O_DIRECTORY)) < 0)
- goto cleanup;
- closedir(d);
- d = fdopendir(fd);
- *b = '/';
- path = b, p = g;
- }
- }
+ char **list;
+ word *a;
+ int n = 0;
+ for(a = left;a!=right;a = a->next) n++;
+ list = (char **)emalloc(n*sizeof(char *));
+ for(a = left,n = 0;a!=right;a = a->next,n++) list[n] = a->word;
+ qsort((void *)list, n, sizeof(void *), globcmp);
+ for(a = left,n = 0;a!=right;a = a->next,n++) a->word = list[n];
+ efree((char *)list);
+}
+/*
+ * Push names prefixed by globname and suffixed by a match of p onto the astack.
+ * namep points to the end of the prefix in globname.
+ */
- /* if we are at the end of the pattern, check if name exists */
- if(!*g) {
- *b = 0;
- if(faccessat(fd, path, F_OK, AT_SYMLINK_NOFOLLOW) == 0)
- matches = newword(buffer, matches);
- goto cleanup;
- }
+void
+globdir(char *p, char *namep)
+{
+ char *t, *newp;
+ int f;
+ /* scan the pattern looking for a component with a metacharacter in it */
+ if(*p=='\0'){
+ globv = newword(globname, globv);
+ return;
+ }
+ t = namep;
+ newp = p;
+ while(*newp){
+ if(*newp==GLOB)
+ break;
+ *t=*newp++;
+ if(*t++=='/'){
+ namep = t;
+ p = newp;
+ }
+ }
+ /* If we ran out of pattern, append the name if accessible */
+ if(*newp=='\0'){
+ *t='\0';
+ if(access(globname, 0)==0)
+ globv = newword(globname, globv);
+ return;
+ }
+ /* read the directory and recur for any entry that matches */
+ *namep='\0';
+ if((f = Opendir(globname[0]?globname:"."))<0) return;
+ while(*newp!='/' && *newp!='\0') newp++;
+ while(Readdir(f, namep, *newp=='/')){
+ if(matchfn(namep, p)){
+ for(t = namep;*t;t++);
+ globdir(newp, t);
+ }
+ }
+ Closedir(f);
+}
+/*
+ * Push all file names matched by p on the current thread's stack.
+ * If there are no matches, the list consists of p.
+ */
- /* we have a non-trivial pattern to match */
- /* partition on the next directory */
- while(*g && *g!='/')
- g++;
+void
+glob(char *p)
+{
+ word *svglobv = globv;
+ int globlen = Globsize(p);
+ if(!globlen){
+ deglob(p);
+ globv = newword(p, globv);
+ return;
+ }
+ globname = emalloc(globlen);
+ globname[0]='\0';
+ globdir(p, globname);
+ efree(globname);
+ if(svglobv==globv){
+ deglob(p);
+ globv = newword(p, globv);
+ }
+ else
+ globsort(globv, svglobv);
+}
+/*
+ * Do p and q point at equal utf codes
+ */
- if(*g){
- j = 1;
- *g = 0;
- } else
- j = 0;
+int
+equtf(char *p, char *q)
+{
+ if(*p!=*q)
+ return 0;
+ if(twobyte(*p)) return p[1]==q[1];
+ if(threebyte(*p)){
+ if(p[1]!=q[1])
+ return 0;
+ if(p[1]=='\0')
+ return 1; /* broken code at end of string! */
+ return p[2]==q[2];
+ }
+ if(fourbyte(*p)){
+ if(p[1]!=q[1])
+ return 0;
+ if(p[1]=='\0')
+ return 1;
+ if(p[2]!=q[2])
+ return 0;
+ if(p[2]=='\0')
+ return 1;
+ return p[3]==q[3];
+ }
+ return 1;
+}
+/*
+ * Return a pointer to the next utf code in the string,
+ * not jumping past nuls in broken utf codes!
+ */
- while((e = readdir(d))) {
- if (e->d_name[0] == '.')
- if (e->d_name[1] == 0 || /* . */
- (e->d_name[1] == '.' && e->d_name[2] == 0)) /* .. */
- continue;
+char*
+nextutf(char *p)
+{
+ if(twobyte(*p)) return p[1]=='\0'?p+1:p+2;
+ if(threebyte(*p)) return p[1]=='\0'?p+1:p[2]=='\0'?p+2:p+3;
+ if(fourbyte(*p)) return p[1]=='\0'?p+1:p[2]=='\0'?p+2:p[3]=='\0'?p+3:p+4;
+ return p+1;
+}
+/*
+ * Convert the utf code at *p to a unicode value
+ */
- for(i=0;e->d_name[i];i++)
- b[i]=e->d_name[i];
- b[i]=0;
+int
+unicode(char *p)
+{
+ int u=*p&0xff;
+ if(twobyte(u)) return ((u&0x1f)<<6)|(p[1]&0x3f);
+ if(threebyte(u)) return (u<<12)|((p[1]&0x3f)<<6)|(p[2]&0x3f);
+ if(fourbyte(u)) return (u<<18)|((p[1]&0x3f)<<12)|((p[2]&0x3f)<<6)|(p[3]&0x3f);
+ return u;
+}
+/*
+ * Does the string s match the pattern p
+ * . and .. are only matched by patterns starting with .
+ * * matches any sequence of characters
+ * ? matches any single character
+ * [...] matches the enclosed list of characters
+ */
- if(match(path, p))
- globdir(g+j, b, fd);
- }
+int
+matchfn(char *s, char *p)
+{
+ if(s[0]=='.' && (s[1]=='\0' || s[1]=='.' && s[2]=='\0') && p[0]!='.')
+ return 0;
+ return match(s, p, '/');
+}
- printf("successful\n");
-cleanup:
- printf("cleaning up\n");
- /* NOTE: a successful closedir also closes the file descriptor */
- closedir(d);
- return;
+int
+match(char *s, char *p, int stop)
+{
+ int compl, hit, lo, hi, t, c;
+ for(;*p!=stop && *p!='\0';s = nextutf(s),p = nextutf(p)){
+ if(*p!=GLOB){
+ if(!equtf(p, s)) return 0;
+ }
+ else switch(*++p){
+ case GLOB:
+ if(*s!=GLOB)
+ return 0;
+ break;
+ case '*':
+ for(;;){
+ if(match(s, nextutf(p), stop)) return 1;
+ if(!*s)
+ break;
+ s = nextutf(s);
+ }
+ return 0;
+ case '?':
+ if(*s=='\0')
+ return 0;
+ break;
+ case '[':
+ if(*s=='\0')
+ return 0;
+ c = unicode(s);
+ p++;
+ compl=*p=='~';
+ if(compl)
+ p++;
+ hit = 0;
+ while(*p!=']'){
+ if(*p=='\0')
+ return 0; /* syntax error */
+ lo = unicode(p);
+ p = nextutf(p);
+ if(*p!='-')
+ hi = lo;
+ else{
+ p++;
+ if(*p=='\0')
+ return 0; /* syntax error */
+ hi = unicode(p);
+ p = nextutf(p);
+ if(hi<lo){ t = lo; lo = hi; hi = t; }
+ }
+ if(lo<=c && c<=hi)
+ hit = 1;
+ }
+ if(compl)
+ hit=!hit;
+ if(!hit)
+ return 0;
+ break;
+ }
+ }
+ return *s=='\0';
}
void
-glob(char *p)
+globlist1(word *gl)
{
- char *path = buffer;
-
- globdir(p, path, AT_FDCWD);
+ if(gl){
+ globlist1(gl->next);
+ glob(gl->word);
+ }
}
-#if 0
-int
-main()
+void
+globlist(void)
{
- errio = openfd(2);
- glob("\x01*");
- pval(errio, matches);
- flush(&errio);
+ word *a;
+ globv = 0;
+ globlist1(runq->argv->words);
+ poplist();
+ pushlist();
+ if(globv){
+ for(a = globv;a->next;a = a->next);
+ a->next = runq->argv->words;
+ runq->argv->words = globv;
+ }
}
-#endif