From 1455834a50b8b6a15567e971db664fe7a6cdfaf6 Mon Sep 17 00:00:00 2001 From: Nicholas Noll Date: Fri, 29 Oct 2021 17:46:41 -0700 Subject: fix(unicode): emoji widths and readline now moves in a more unicode aware manner --- sys/cmd/rc/input.c | 34 ++++++++++++++++++++++++---------- 1 file changed, 24 insertions(+), 10 deletions(-) (limited to 'sys/cmd/rc/input.c') diff --git a/sys/cmd/rc/input.c b/sys/cmd/rc/input.c index 08363f0..5d30ccd 100644 --- a/sys/cmd/rc/input.c +++ b/sys/cmd/rc/input.c @@ -341,14 +341,16 @@ refreshsingleline(struct TerminalState *term) w = utf8·runewidth(r); buf+=n, len-=n; - pos-=w, col-=w; + pos-=w, col-=w; } assert(buf <= term->edit.buf + len); while(off+col > term->cursor.cap){ n = utf8·decodeprev(buf+len-1, &r); - len-=n, col--; + w = utf8·runewidth(r); + + len-=n, col-=w; } assert(len >= 0); @@ -510,8 +512,8 @@ insertrune(struct TerminalState *term, int n, char *c) memmove(term->edit.buf+term->edit.len+n, term->edit.buf+term->edit.len, term->edit.len-term->edit.pos); memcpy(term->edit.buf+term->edit.pos, c, n); - term->edit.pos+=n, term->edit.len+=n; - term->cursor.pos++, term->cursor.len++; + term->edit.pos += n, term->edit.len += n; + term->cursor.pos += w, term->cursor.len += w; term->edit.buf[term->edit.len] = '\0'; refreshline(term); @@ -664,15 +666,21 @@ static Position left(struct TerminalState *term, int n) { + int w, d; rune r; Position pos = CURRENT(term); char *buf = term->edit.buf + term->edit.pos; - pos.cursor = MAX(pos.cursor-n, 0); - - while(n-- > 0 && buf > term->edit.buf) + d = 0; + while(n > 0 && buf > term->edit.buf){ buf -= utf8·decodeprev(buf-1, &r); + w = utf8·runewidth(r); + n -= w; + d += w; + } + + pos.cursor = MAX(pos.cursor-d, 0); pos.buffer = MAX(buf-term->edit.buf, 0); return pos; } @@ -682,16 +690,22 @@ static Position right(struct TerminalState *term, int n) { + int w, d; rune r; Position pos = CURRENT(term); char *end = term->edit.buf + term->edit.len; char *buf = term->edit.buf + term->edit.pos; - pos.cursor = MIN(pos.cursor+n, term->cursor.len); - - while(n-- > 0 && buf < end) + d = 0; + while(n > 0 && buf < end){ buf += utf8·decode(buf, &r); + w = utf8·runewidth(r); + n -= w; + d += w; + } + + pos.cursor = MIN(pos.cursor+d, term->cursor.len); pos.buffer = MIN(buf-term->edit.buf, term->edit.len); return pos; } -- cgit v1.2.1 From 65e84b15a944c83862da736a427636f3e64d3fc2 Mon Sep 17 00:00:00 2001 From: Nicholas Noll Date: Thu, 4 Nov 2021 15:00:36 -0700 Subject: Feat: input readline now unicode aware Updated our assumptions of readline to handle valid unicode input. This required integrating with an explicit library to handle unicode knowledge. --- sys/cmd/rc/input.c | 308 +++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 237 insertions(+), 71 deletions(-) (limited to 'sys/cmd/rc/input.c') diff --git a/sys/cmd/rc/input.c b/sys/cmd/rc/input.c index 5d30ccd..7ec8100 100644 --- a/sys/cmd/rc/input.c +++ b/sys/cmd/rc/input.c @@ -8,7 +8,7 @@ enum { NonPrintable, Alnum, - Punctation, + Punctuation, Space }; @@ -119,6 +119,22 @@ typedef struct typedef Position (*Noun)(struct TerminalState*, int); typedef void (*Verb)(struct TerminalState*, Position); +static +int +runetype(rune r) +{ + if(r<128) + return ascii[r]; + if(utf8·isspace(r)) + return Space; + if(utf8·isdigit(r) || utf8·isalpha(r)) + return Alnum; + if(utf8·ispunct(r)) + return Punctuation; + + return NonPrintable; +} + static void normalcursor(int fd) @@ -509,7 +525,7 @@ insertrune(struct TerminalState *term, int n, char *c) } refreshline(term); }else{ - memmove(term->edit.buf+term->edit.len+n, term->edit.buf+term->edit.len, term->edit.len-term->edit.pos); + memmove(term->edit.buf+term->edit.pos+n, term->edit.buf+term->edit.pos, term->edit.len-term->edit.pos); memcpy(term->edit.buf+term->edit.pos, c, n); term->edit.pos += n, term->edit.len += n; @@ -666,8 +682,8 @@ static Position left(struct TerminalState *term, int n) { - int w, d; rune r; + int w, d; Position pos = CURRENT(term); char *buf = term->edit.buf + term->edit.pos; @@ -690,11 +706,12 @@ static Position right(struct TerminalState *term, int n) { - int w, d; rune r; + int w, d; Position pos = CURRENT(term); - char *end = term->edit.buf + term->edit.len; + char *buf = term->edit.buf + term->edit.pos; + char *end = term->edit.buf + term->edit.len; d = 0; while(n > 0 && buf < end){ @@ -710,122 +727,281 @@ right(struct TerminalState *term, int n) return pos; } -#define HOME(term) (Position){0} - -#if 0 static Position prevword(struct TerminalState *term, int n) { - int c; + rune r; + int c, w, b, d; Position pos = CURRENT(term); + char *buf = term->edit.buf + term->edit.pos; + d = 0; while(n-- > 0 && buf > term->edit.buf){ - while(buf > term->edit.buf && ascii[buf[-1]] == Space) - --buf; + eatspace: + b = utf8·decodeprev(buf-1, &r); + w = utf8·runewidth(r); + if((c=runetype(r)) == Space){ + buf -= b; + d += w; + + if(buf <= term->edit.buf) + break; + + goto eatspace; + } + + eatword: + if(runetype(r) == c){ + buf -= b; + d += w; - c = ascii[buf[-1]]; - while(buf> term->edit.buf && ascii[buf[-1]] == c) - --buf; + if(buf <= term->edit.buf) + break; + + b = utf8·decodeprev(buf-1, &r); + w = utf8·runewidth(r); + + goto eatword; + } } - return buf-term->edit.buf; + pos.cursor = MAX(pos.cursor-d, 0); + pos.buffer = MAX(buf-term->edit.buf, 0); + return pos; } static Position -prevWord(struct TerminalState *term, int n) +nextword(struct TerminalState *term, int n) { - char *it = term->edit.buf + term->edit.pos; + rune r; + int c, b, w, d; + Position pos = CURRENT(term); + + char *buf = term->edit.buf + term->edit.pos; + char *end = term->edit.buf + term->edit.len; + + d = 0; + while(n-- > 0 && buf < end){ + b = utf8·decode(buf, &r); + w = utf8·runewidth(r); + c = runetype(r); + eatword: + if(runetype(r) == c){ + buf += b; + d += w; + + if(buf >= end) + break; + + b = utf8·decode(buf, &r); + w = utf8·runewidth(r); + goto eatword; + } + eatspace: + while((c=runetype(r)) == Space){ + buf += b; + d += w; - while(n-- > 0 && it > term->edit.buf){ - while(it > term->edit.buf && ascii[it[-1]] == Space) - --it; + if(buf >= end) + break; - while(it > term->edit.buf && ascii[it[-1]] != Space) - --it; + b = utf8·decode(buf, &r); + w = utf8·runewidth(r); + goto eatspace; + } } - return it-term->edit.buf; + pos.cursor = MIN(pos.cursor+d, term->cursor.len); + pos.buffer = MIN(buf-term->edit.buf, term->edit.len); + return pos; } + static Position -nextword(struct TerminalState *term, int n) +prevWord(struct TerminalState *term, int n) { - int c; - char *it = term->edit.buf + term->edit.pos; - char *end = term->edit.buf + term->edit.len; + rune r; + int c, w, b, d; + Position pos = CURRENT(term); + + char *buf = term->edit.buf + term->edit.pos; + + d = 0; + while(n-- > 0 && buf > term->edit.buf){ + eatspace: + b = utf8·decodeprev(buf-1, &r); + w = utf8·runewidth(r); + if((c=runetype(r)) == Space){ + buf -= b; + d += w; + + if(buf <= term->edit.buf) + break; + + goto eatspace; + } + + eatword: + if((c=runetype(r)) != Space){ + buf -= b; + d += w; - while(n-- > 0 && it < end){ - c = ascii[*it]; - while(it < end && ascii[*it] == c) - ++it; + if(buf <= term->edit.buf) + break; - while(it < end && ascii[*it] == Space) - ++it; + b = utf8·decodeprev(buf-1, &r); + w = utf8·runewidth(r); + + goto eatword; + } } - return it-term->edit.buf; + pos.cursor = MAX(pos.cursor-d, 0); + pos.buffer = MAX(buf-term->edit.buf, 0); + return pos; } static Position nextWord(struct TerminalState *term, int n) { - char *it = term->edit.buf + term->edit.pos; + rune r; + int b, w, d; + Position pos = CURRENT(term); + + char *buf = term->edit.buf + term->edit.pos; char *end = term->edit.buf + term->edit.len; - while(n-- > 0 && it < end){ - while(it < end && ascii[*it] != Space) - ++it; + d = 0; + while(n-- > 0 && buf < end){ + eatword: + b = utf8·decode(buf, &r); + w = utf8·runewidth(r); + if(runetype(r) != Space){ + buf += b; + d += w; + + if(buf > end) + break; + + goto eatword; + } + + eatspace: + if(runetype(r) == Space){ + buf += b; + d += w; + + if(buf > end) + break; - while(it < end && ascii[*it] == Space) - ++it; + b = utf8·decode(buf, &r); + w = utf8·runewidth(r); + + goto eatspace; + } } - return it-term->edit.buf; + pos.cursor = MIN(pos.cursor+d, term->cursor.len); + pos.buffer = MIN(buf-term->edit.buf, term->edit.len); + return pos; } static Position nextend(struct TerminalState *term, int n) { - int c; - char *it = term->edit.buf + term->edit.pos; + rune r; + int c, b, w, d; + Position pos = CURRENT(term); + + char *buf = term->edit.buf + term->edit.pos; char *end = term->edit.buf + term->edit.len; - while(n-- > 0 && it+1 < end){ - while(it+1 < end && ascii[it[1]] == Space) - ++it; + d = 0; + while(n-- > 0 && buf+1 < end){ + eatspace: + b = utf8·decode(buf+1, &r); + w = utf8·runewidth(r); + while((c=runetype(r)) == Space){ + buf += b; + d += w; + + if(buf+1 >= end) + break; + + goto eatspace; + } + eatword: + if(runetype(r) == c){ + buf += b; + d += w; + + if(buf+1 >= end) + break; - c = ascii[it[1]]; - while(it+1 < end && ascii[it[1]] == c) - ++it; + b = utf8·decode(buf+1, &r); + w = utf8·runewidth(r); + goto eatword; + } } - return it-term->edit.buf; + pos.cursor = MIN(pos.cursor+d, term->cursor.len); + pos.buffer = MIN(buf-term->edit.buf, term->edit.len); + return pos; } static Position nextEnd(struct TerminalState *term, int n) { - char *it = term->edit.buf + term->edit.pos; + rune r; + int b, w, d; + Position pos = CURRENT(term); + + char *buf = term->edit.buf + term->edit.pos; char *end = term->edit.buf + term->edit.len; - while(n-- > 0 && it+1 < end){ - while(it+1 < end && ascii[it[1]] == Space) - ++it; + d = 0; + while(n-- > 0 && buf+1 < end){ + eatspace: + b = utf8·decode(buf+1, &r); + w = utf8·runewidth(r); + if(runetype(r) == Space){ + buf += b; + d += w; + + if(buf+1 > end) + break; + + goto eatspace; + } + + eatword: + if(runetype(r) != Space){ + buf += b; + d += w; + + if(buf+1 > end) + break; - while(it < end && ascii[it[1]] != Space) - ++it; + b = utf8·decode(buf+1, &r); + w = utf8·runewidth(r); + + goto eatword; + } } - return it-term->edit.buf; + pos.cursor = MIN(pos.cursor+d, term->cursor.len); + pos.buffer = MIN(buf-term->edit.buf, term->edit.len); + return pos; } +#define HOME(term) (Position){0} #define END(term) (Position){(term)->edit.len, (term)->cursor.len} static @@ -978,7 +1154,6 @@ action: return 0; } #undef END -#endif #define END(term) (Position){(term).edit.len, (term).cursor.len} @@ -1152,7 +1327,6 @@ interact(int ifd, int ofd, char *buf, intptr len, char *prompt) } } case 1: -#if 0 if(mode.vi.on){ if(mode.vi.insert){ normalmode(term.ofd); @@ -1163,7 +1337,6 @@ interact(int ifd, int ofd, char *buf, intptr len, char *prompt) continue; } } -#endif default: // 2 ; } @@ -1216,22 +1389,15 @@ interact(int ifd, int ofd, char *buf, intptr len, char *prompt) break; } } - break; default: -#if 0 - if(mode.vi.on && !mode.vi.insert){ - if(vi(&term,c) < 0){ + if(mode.vi.on && !mode.vi.insert && n == 1){ + if(vi(&term,c[0]) < 0){ term.edit.len = -1; goto finish; } - }else if(!insertchar(&term,c)){ - term.edit.len = -1; - goto finish; - } -#endif - if(!insertrune(&term, n, c)){ + }else if(!insertrune(&term,n,c)){ term.edit.len = -1; goto finish; } @@ -1266,7 +1432,7 @@ interact(int ifd, int ofd, char *buf, intptr len, char *prompt) break; case KeyCtrlW: /* ctrl+w, delete previous word */ - /* delete(&term, prevword(&term,1)); */ + delete(&term, prevword(&term,1)); break; } } -- cgit v1.2.1 From 43688fe7190d0350349d47727c3663421d5618dc Mon Sep 17 00:00:00 2001 From: Nicholas Noll Date: Mon, 8 Nov 2021 08:46:56 -0800 Subject: feat(rc): added back functionality of prompt, now unicode aware --- sys/cmd/rc/input.c | 155 ++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 136 insertions(+), 19 deletions(-) (limited to 'sys/cmd/rc/input.c') diff --git a/sys/cmd/rc/input.c b/sys/cmd/rc/input.c index 7ec8100..cc2383d 100644 --- a/sys/cmd/rc/input.c +++ b/sys/cmd/rc/input.c @@ -32,7 +32,8 @@ static int ascii[256] = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; -struct Mode { +struct Mode +{ ushort raw : 1; ushort multiline : 1; ushort mask : 1; @@ -43,9 +44,6 @@ struct Mode { } vi ; }; -static struct Mode mode; -static struct termios originalterm; - /* * the structure represents the state during line editing. * we pass this state to functions implementing specific editing functionalities @@ -82,8 +80,22 @@ struct TerminalState } yank; /* yank buffer */ intptr maxrows; /* maximum num of rows used so far (multiline mode) */ + intptr history; /* index of history we are currently editing */ }; +/* + * line history (circular buffer) + */ +struct History +{ + char **bot, **top, *entry[1024]; +}; + +/* globals */ +static struct Mode mode; +static struct History history; +static struct termios originalterm; + enum { KeyNil = 0, /* nil */ @@ -293,14 +305,120 @@ beep(void) fflush(stderr); } -/* =========================== Line editing ================================= */ +// ----------------------------------------------------------------------- +// command history + +void +inithistory(void) +{ + history.bot = history.top = history.entry; +} + +int +addhistory(char *line) +{ + char *copy; + + copy = strdup(line); + if(!copy) + return 0; + + *history.top++ = copy; + if(history.top == arrend(history.entry)) + history.top = history.entry; + + if(history.top == history.bot){ + efree(history.bot); + history.bot++; + } + + return 1; +} + +static +void +pophistory(void) +{ + if(--history.top < history.entry) + history.top = arrend(history.entry)-1; + efree(*history.top); +} + +static void refreshline(struct TerminalState *); -/* We define a very simple "append buffer" structure, that is an heap - * allocated string where we can append to. This is useful in order to +static +char ** +currenthistory(struct TerminalState *term, intptr *size) +{ + char **entry; + intptr len, head; + + if(history.top > history.bot){ + len = history.top - history.bot; + entry = history.top - term->history - 1; + }else if(history.top < history.bot){ + len = (arrend(history.entry) - history.bot) + (history.top - history.entry); + if((head=history.top - history.entry) < term->history) + entry = arrend(history.entry) - head; + else + entry = history.top - term->history - 1; + }else + return nil; + + *size = len; + return entry; +} + +static +void +usehistory(struct TerminalState *term, int d) +{ + rune r; + intptr w, len; + char *b, *e, **entry; + + if(!(entry = currenthistory(term, &len))) + return; + + efree(*entry); + *entry = strdup(term->edit.buf); + + term->history += d; + if(term->history < 0){ + term->history = 0; + return; + }else if(term->history >= len){ + term->history = len - 1; + return; + } + entry = currenthistory(term, &len); + + strncpy(term->edit.buf, *entry, term->edit.cap); + term->edit.buf[term->edit.cap-1] = 0; + + /* update cursor/buffer positions */ + term->edit.len = term->edit.pos = strlen(term->edit.buf); + for(w=0, b=term->edit.buf, e=term->edit.buf+term->edit.len; b < e; ){ + b += utf8·decode(b, &r); + w += utf8·runewidth(r); + } + term->cursor.len = term->cursor.pos = w; + + refreshline(term); +} + +// ----------------------------------------------------------------------- +// line editing + +/* + * we define a very simple "append buffer" structure, that is an heap + * allocated string where we can append to. this is useful in order to * write all the escape sequences in a buffer and flush them to the standard - * output in a single call, to avoid flickering effects. */ + * output in a single call, to avoid flickering effects. + */ -struct Buffer { +struct Buffer +{ int len; char *b; }; @@ -667,14 +785,8 @@ refresh: term->cursor.len -= diff; refreshline(term); } - /* movements */ -void -movehistory(struct TerminalState *term, int dir) -{ -} - #define CURRENT(term) (Position){ .buffer=(term)->edit.pos, .cursor=(term)->cursor.pos }; // move cursor to the left n boxes @@ -1223,6 +1335,7 @@ interact(int ifd, int ofd, char *buf, intptr len, char *prompt) term.cursor.cap = columns(ifd, ofd); term.maxrows = 0; + term.history = 0; term.yank.buf = nil; term.yank.cap = term.yank.len = 0; @@ -1231,6 +1344,9 @@ interact(int ifd, int ofd, char *buf, intptr len, char *prompt) term.edit.buf[0] = '\0'; term.edit.cap--; /* make sure there is always space for the nulterm */ + /* push current (empty) command onto history stack */ + addhistory(""); + if(write(term.ofd,prompt,term.prompt.len) == -1) return -1; @@ -1259,6 +1375,7 @@ interact(int ifd, int ofd, char *buf, intptr len, char *prompt) switch(r){ case KeyEnter: + pophistory(); if(mode.multiline) move(&term, END(term)); goto finish; @@ -1300,11 +1417,11 @@ interact(int ifd, int ofd, char *buf, intptr len, char *prompt) break; case KeyCtrlP: /* ctrl-p */ - /* TODO next history */ + usehistory(&term, +1); break; case KeyCtrlN: /* ctrl-n */ - /* TODO prev history */ + usehistory(&term, -1); break; case KeyEsc: /* escape sequence */ @@ -1358,10 +1475,10 @@ interact(int ifd, int ofd, char *buf, intptr len, char *prompt) }else{ switch(esc[1]) { case 'A': /* up */ - movehistory(&term, 1); + usehistory(&term, +1); break; case 'B': /* down */ - movehistory(&term, 0); + usehistory(&term, -1); break; case 'C': /* right */ move(&term, right(&term, 1)); -- cgit v1.2.1