From 65e84b15a944c83862da736a427636f3e64d3fc2 Mon Sep 17 00:00:00 2001 From: Nicholas Noll Date: Thu, 4 Nov 2021 15:00:36 -0700 Subject: Feat: input readline now unicode aware Updated our assumptions of readline to handle valid unicode input. This required integrating with an explicit library to handle unicode knowledge. --- include/libunicode.h | 2 +- sys/cmd/rc/input.c | 308 +++++++++++++++++++++++++++++++++++++++------------ sys/cmd/rc/main.c | 2 +- sys/cmd/term/term.c | 2 +- 4 files changed, 240 insertions(+), 74 deletions(-) diff --git a/include/libunicode.h b/include/libunicode.h index d6618eb..25d6dee 100644 --- a/include/libunicode.h +++ b/include/libunicode.h @@ -27,7 +27,7 @@ char *utf8·findlast(char* s, rune); // find last rune in char stream int utf8·canfit(char *, int); // XXX: odd function... -int utf8·isletter(rune r); +int utf8·isalpha(rune r); int utf8·isdigit(rune r); int utf8·isspace(rune r); int utf8·istitle(rune r); diff --git a/sys/cmd/rc/input.c b/sys/cmd/rc/input.c index 5d30ccd..7ec8100 100644 --- a/sys/cmd/rc/input.c +++ b/sys/cmd/rc/input.c @@ -8,7 +8,7 @@ enum { NonPrintable, Alnum, - Punctation, + Punctuation, Space }; @@ -119,6 +119,22 @@ typedef struct typedef Position (*Noun)(struct TerminalState*, int); typedef void (*Verb)(struct TerminalState*, Position); +static +int +runetype(rune r) +{ + if(r<128) + return ascii[r]; + if(utf8·isspace(r)) + return Space; + if(utf8·isdigit(r) || utf8·isalpha(r)) + return Alnum; + if(utf8·ispunct(r)) + return Punctuation; + + return NonPrintable; +} + static void normalcursor(int fd) @@ -509,7 +525,7 @@ insertrune(struct TerminalState *term, int n, char *c) } refreshline(term); }else{ - memmove(term->edit.buf+term->edit.len+n, term->edit.buf+term->edit.len, term->edit.len-term->edit.pos); + memmove(term->edit.buf+term->edit.pos+n, term->edit.buf+term->edit.pos, term->edit.len-term->edit.pos); memcpy(term->edit.buf+term->edit.pos, c, n); term->edit.pos += n, term->edit.len += n; @@ -666,8 +682,8 @@ static Position left(struct TerminalState *term, int n) { - int w, d; rune r; + int w, d; Position pos = CURRENT(term); char *buf = term->edit.buf + term->edit.pos; @@ -690,11 +706,12 @@ static Position right(struct TerminalState *term, int n) { - int w, d; rune r; + int w, d; Position pos = CURRENT(term); - char *end = term->edit.buf + term->edit.len; + char *buf = term->edit.buf + term->edit.pos; + char *end = term->edit.buf + term->edit.len; d = 0; while(n > 0 && buf < end){ @@ -710,122 +727,281 @@ right(struct TerminalState *term, int n) return pos; } -#define HOME(term) (Position){0} - -#if 0 static Position prevword(struct TerminalState *term, int n) { - int c; + rune r; + int c, w, b, d; Position pos = CURRENT(term); + char *buf = term->edit.buf + term->edit.pos; + d = 0; while(n-- > 0 && buf > term->edit.buf){ - while(buf > term->edit.buf && ascii[buf[-1]] == Space) - --buf; + eatspace: + b = utf8·decodeprev(buf-1, &r); + w = utf8·runewidth(r); + if((c=runetype(r)) == Space){ + buf -= b; + d += w; + + if(buf <= term->edit.buf) + break; + + goto eatspace; + } + + eatword: + if(runetype(r) == c){ + buf -= b; + d += w; - c = ascii[buf[-1]]; - while(buf> term->edit.buf && ascii[buf[-1]] == c) - --buf; + if(buf <= term->edit.buf) + break; + + b = utf8·decodeprev(buf-1, &r); + w = utf8·runewidth(r); + + goto eatword; + } } - return buf-term->edit.buf; + pos.cursor = MAX(pos.cursor-d, 0); + pos.buffer = MAX(buf-term->edit.buf, 0); + return pos; } static Position -prevWord(struct TerminalState *term, int n) +nextword(struct TerminalState *term, int n) { - char *it = term->edit.buf + term->edit.pos; + rune r; + int c, b, w, d; + Position pos = CURRENT(term); + + char *buf = term->edit.buf + term->edit.pos; + char *end = term->edit.buf + term->edit.len; + + d = 0; + while(n-- > 0 && buf < end){ + b = utf8·decode(buf, &r); + w = utf8·runewidth(r); + c = runetype(r); + eatword: + if(runetype(r) == c){ + buf += b; + d += w; + + if(buf >= end) + break; + + b = utf8·decode(buf, &r); + w = utf8·runewidth(r); + goto eatword; + } + eatspace: + while((c=runetype(r)) == Space){ + buf += b; + d += w; - while(n-- > 0 && it > term->edit.buf){ - while(it > term->edit.buf && ascii[it[-1]] == Space) - --it; + if(buf >= end) + break; - while(it > term->edit.buf && ascii[it[-1]] != Space) - --it; + b = utf8·decode(buf, &r); + w = utf8·runewidth(r); + goto eatspace; + } } - return it-term->edit.buf; + pos.cursor = MIN(pos.cursor+d, term->cursor.len); + pos.buffer = MIN(buf-term->edit.buf, term->edit.len); + return pos; } + static Position -nextword(struct TerminalState *term, int n) +prevWord(struct TerminalState *term, int n) { - int c; - char *it = term->edit.buf + term->edit.pos; - char *end = term->edit.buf + term->edit.len; + rune r; + int c, w, b, d; + Position pos = CURRENT(term); + + char *buf = term->edit.buf + term->edit.pos; + + d = 0; + while(n-- > 0 && buf > term->edit.buf){ + eatspace: + b = utf8·decodeprev(buf-1, &r); + w = utf8·runewidth(r); + if((c=runetype(r)) == Space){ + buf -= b; + d += w; + + if(buf <= term->edit.buf) + break; + + goto eatspace; + } + + eatword: + if((c=runetype(r)) != Space){ + buf -= b; + d += w; - while(n-- > 0 && it < end){ - c = ascii[*it]; - while(it < end && ascii[*it] == c) - ++it; + if(buf <= term->edit.buf) + break; - while(it < end && ascii[*it] == Space) - ++it; + b = utf8·decodeprev(buf-1, &r); + w = utf8·runewidth(r); + + goto eatword; + } } - return it-term->edit.buf; + pos.cursor = MAX(pos.cursor-d, 0); + pos.buffer = MAX(buf-term->edit.buf, 0); + return pos; } static Position nextWord(struct TerminalState *term, int n) { - char *it = term->edit.buf + term->edit.pos; + rune r; + int b, w, d; + Position pos = CURRENT(term); + + char *buf = term->edit.buf + term->edit.pos; char *end = term->edit.buf + term->edit.len; - while(n-- > 0 && it < end){ - while(it < end && ascii[*it] != Space) - ++it; + d = 0; + while(n-- > 0 && buf < end){ + eatword: + b = utf8·decode(buf, &r); + w = utf8·runewidth(r); + if(runetype(r) != Space){ + buf += b; + d += w; + + if(buf > end) + break; + + goto eatword; + } + + eatspace: + if(runetype(r) == Space){ + buf += b; + d += w; + + if(buf > end) + break; - while(it < end && ascii[*it] == Space) - ++it; + b = utf8·decode(buf, &r); + w = utf8·runewidth(r); + + goto eatspace; + } } - return it-term->edit.buf; + pos.cursor = MIN(pos.cursor+d, term->cursor.len); + pos.buffer = MIN(buf-term->edit.buf, term->edit.len); + return pos; } static Position nextend(struct TerminalState *term, int n) { - int c; - char *it = term->edit.buf + term->edit.pos; + rune r; + int c, b, w, d; + Position pos = CURRENT(term); + + char *buf = term->edit.buf + term->edit.pos; char *end = term->edit.buf + term->edit.len; - while(n-- > 0 && it+1 < end){ - while(it+1 < end && ascii[it[1]] == Space) - ++it; + d = 0; + while(n-- > 0 && buf+1 < end){ + eatspace: + b = utf8·decode(buf+1, &r); + w = utf8·runewidth(r); + while((c=runetype(r)) == Space){ + buf += b; + d += w; + + if(buf+1 >= end) + break; + + goto eatspace; + } + eatword: + if(runetype(r) == c){ + buf += b; + d += w; + + if(buf+1 >= end) + break; - c = ascii[it[1]]; - while(it+1 < end && ascii[it[1]] == c) - ++it; + b = utf8·decode(buf+1, &r); + w = utf8·runewidth(r); + goto eatword; + } } - return it-term->edit.buf; + pos.cursor = MIN(pos.cursor+d, term->cursor.len); + pos.buffer = MIN(buf-term->edit.buf, term->edit.len); + return pos; } static Position nextEnd(struct TerminalState *term, int n) { - char *it = term->edit.buf + term->edit.pos; + rune r; + int b, w, d; + Position pos = CURRENT(term); + + char *buf = term->edit.buf + term->edit.pos; char *end = term->edit.buf + term->edit.len; - while(n-- > 0 && it+1 < end){ - while(it+1 < end && ascii[it[1]] == Space) - ++it; + d = 0; + while(n-- > 0 && buf+1 < end){ + eatspace: + b = utf8·decode(buf+1, &r); + w = utf8·runewidth(r); + if(runetype(r) == Space){ + buf += b; + d += w; + + if(buf+1 > end) + break; + + goto eatspace; + } + + eatword: + if(runetype(r) != Space){ + buf += b; + d += w; + + if(buf+1 > end) + break; - while(it < end && ascii[it[1]] != Space) - ++it; + b = utf8·decode(buf+1, &r); + w = utf8·runewidth(r); + + goto eatword; + } } - return it-term->edit.buf; + pos.cursor = MIN(pos.cursor+d, term->cursor.len); + pos.buffer = MIN(buf-term->edit.buf, term->edit.len); + return pos; } +#define HOME(term) (Position){0} #define END(term) (Position){(term)->edit.len, (term)->cursor.len} static @@ -978,7 +1154,6 @@ action: return 0; } #undef END -#endif #define END(term) (Position){(term).edit.len, (term).cursor.len} @@ -1152,7 +1327,6 @@ interact(int ifd, int ofd, char *buf, intptr len, char *prompt) } } case 1: -#if 0 if(mode.vi.on){ if(mode.vi.insert){ normalmode(term.ofd); @@ -1163,7 +1337,6 @@ interact(int ifd, int ofd, char *buf, intptr len, char *prompt) continue; } } -#endif default: // 2 ; } @@ -1216,22 +1389,15 @@ interact(int ifd, int ofd, char *buf, intptr len, char *prompt) break; } } - break; default: -#if 0 - if(mode.vi.on && !mode.vi.insert){ - if(vi(&term,c) < 0){ + if(mode.vi.on && !mode.vi.insert && n == 1){ + if(vi(&term,c[0]) < 0){ term.edit.len = -1; goto finish; } - }else if(!insertchar(&term,c)){ - term.edit.len = -1; - goto finish; - } -#endif - if(!insertrune(&term, n, c)){ + }else if(!insertrune(&term,n,c)){ term.edit.len = -1; goto finish; } @@ -1266,7 +1432,7 @@ interact(int ifd, int ofd, char *buf, intptr len, char *prompt) break; case KeyCtrlW: /* ctrl+w, delete previous word */ - /* delete(&term, prevword(&term,1)); */ + delete(&term, prevword(&term,1)); break; } } diff --git a/sys/cmd/rc/main.c b/sys/cmd/rc/main.c index 12f0859..865bcde 100644 --- a/sys/cmd/rc/main.c +++ b/sys/cmd/rc/main.c @@ -59,7 +59,7 @@ main(int argc, char *argv[]) initkeywords(); initshell(); - // enablevi(); + enablevi(); xboot(argc, argv); /* unreachable */ } diff --git a/sys/cmd/term/term.c b/sys/cmd/term/term.c index f92db28..8805b0b 100644 --- a/sys/cmd/term/term.c +++ b/sys/cmd/term/term.c @@ -2096,7 +2096,7 @@ tputc(rune u) } /* combining characters */ - if(!width) { + if(!width){ if(term.c.x > 0) gp = &term.line[term.c.y][term.c.x-1]; else if(term.c.y > 0) -- cgit v1.2.1