Discussion:
[PATCH v2] add option to filter control characters on paste
Alexander Sergeyev
2018-04-19 08:41:28 UTC
Permalink
---
src/optinc.h | 1 +
src/rsinc.h | 1 +
src/screen.C | 63 ++++++++++++++++++++++++++++++++++++++++++++-----
src/xdefaults.C | 1 +
4 files changed, 60 insertions(+), 6 deletions(-)

diff --git a/src/optinc.h b/src/optinc.h
index 09f9a26..befc68a 100644
--- a/src/optinc.h
+++ b/src/optinc.h
@@ -29,6 +29,7 @@
def(secondaryScroll)
def(pastableTabs)
def(cursorUnderline)
+ def(filterPastedControls)
#if ENABLE_FRILLS
def(insecure) // insecure esc sequences
def(borderLess) // mwm borderless hints
diff --git a/src/rsinc.h b/src/rsinc.h
index 86d0dfe..0b40f62 100644
--- a/src/rsinc.h
+++ b/src/rsinc.h
@@ -67,6 +67,7 @@
#if XFT
def (buffered)
#endif
+ def(filterPastedControls)
#if ENABLE_FRILLS
def (depth)
def (visual)
diff --git a/src/screen.C b/src/screen.C
index 9eb375a..2cdc4b6 100644
--- a/src/screen.C
+++ b/src/screen.C
@@ -2703,15 +2703,66 @@ rxvt_term::selection_changed () NOTHROW
void
rxvt_term::tt_paste (char *data, unsigned int len) NOTHROW
{
- /* convert normal newline chars into common keyboard Return key sequence */
- for (unsigned int i = 0; i < len; i++)
- if (data[i] == C0_LF)
- data[i] = C0_CR;
-
if (priv_modes & PrivMode_BracketPaste)
tt_printf ("\x1b[200~");

- tt_write (data, len);
+ if (option(Opt_filterPastedControls)) {
+ /* prepare lookup table for unwanted characters */
+ char lookup[256];
+ memset(lookup, 0, sizeof(lookup));
+ char drop[] = "\x01\x02\x03\x04\x05\x06\x07\x08\x0a\x0b\x0c\x0e\x0f\x10\x11"
+ "\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f\xc2";
+ for (unsigned int i = sizeof(drop); i-- > 0u;) {
+ lookup[(unsigned char) drop[i]] = 1;
+ }
+
+ /* skip the first run of good characters; no change is needed there */
+ char * rptr = data;
+ char * end = data + len;
+ while (rptr < end && lookup[(unsigned char) *rptr] == 0) {
+ ++rptr;
+ }
+ /* encountered a bad symbol (or the end of buffer); from this point copy byte
+ by byte; we won't copy by segments since they are likely to overlap */
+ char * wptr = rptr;
+ while (rptr < end) {
+ /* consume the current character and process it */
+ switch (*rptr++) {
+ /* convert newline chars into common keyboard Return key sequence */
+ case '\x0a':
+ *wptr++ = '\x0d';
+ break;
+ /* filter pairs C2-80 to C2-9F which are UTF8 sequences for U+0080 to U+009F
+ codepoints (C1 Controls and Latin-1 Supplement) */
+ case '\xc2':
+ if (rptr < end) {
+ const unsigned char v = (unsigned char) *rptr++;
+ if (v < (unsigned char) '\x80' || v > (unsigned char) '\x9f') {
+ /* not a bad pair, keep both */
+ *wptr++ = '\xc2';
+ *wptr++ = v;
+ }
+ } else {
+ /* write consumed C2 since it's at the buffer end with no pair */
+ *wptr++ = '\xc2';
+ }
+ default:
+ /* skip the consumed character */
+ break;
+ }
+ /* write one by one until the next bad character */
+ while (rptr < end && lookup[(unsigned char) *rptr] == 0) {
+ *wptr++ = *rptr++;
+ }
+ }
+ tt_write (data, wptr - data);
+ } else {
+ for (unsigned int i = 0; i < len; i++)
+ if (data[i] == C0_LF)
+ data[i] = C0_CR;
+
+ tt_write (data, len);
+ }

if (priv_modes & PrivMode_BracketPaste)
tt_printf ("\x1b[201~");
diff --git a/src/xdefaults.C b/src/xdefaults.C
index 894aa8d..4e973bb 100644
--- a/src/xdefaults.C
+++ b/src/xdefaults.C
@@ -115,6 +115,7 @@ optList[] = {
BOOL (Rs_scrollTtyOutput, NULL, "si", Opt_scrollTtyOutput, Optflag_Reverse, "scroll-on-tty-output inhibit"),
BOOL (Rs_scrollTtyKeypress, "scrollTtyKeypress", "sk", Opt_scrollTtyKeypress, 0, "scroll-on-keypress"),
BOOL (Rs_scrollWithBuffer, "scrollWithBuffer", "sw", Opt_scrollWithBuffer, 0, "scroll-with-buffer"),
+ BOOL (Rs_filterPastedControls, "filterPastedControls", "fc", Opt_filterPastedControls, 0, "filter control characters on paste"),
#if BG_IMAGE_FROM_ROOT
BOOL (Rs_transparent, "inheritPixmap", "ip", Opt_transparent, 0, "inherit parent pixmap"),
BOOL (Rs_transparent, "transparent", "tr", Opt_transparent, 0, "inherit parent pixmap"),
--
2.17.0
Emanuele Giaquinta
2018-04-19 08:50:25 UTC
Permalink
Post by Alexander Sergeyev
---
src/optinc.h | 1 +
src/rsinc.h | 1 +
src/screen.C | 63 ++++++++++++++++++++++++++++++++++++++++++++-----
src/xdefaults.C | 1 +
4 files changed, 60 insertions(+), 6 deletions(-)
You seem to assume that the 'data' argument to 'tt_paste' is in utf-8
encoding, while it is actually in locale encoding (see the
rxvt_selection object in rxvttoolkit.C).

Emanuele
Alexander Sergeyev
2018-04-19 18:31:07 UTC
Permalink
Post by Emanuele Giaquinta
You seem to assume that the 'data' argument to 'tt_paste' is in utf-8
encoding, while it is actually in locale encoding (see the
rxvt_selection object in rxvttoolkit.C).
Yes, that is to be fixed. I've looked up the code. I would have preferred to
move the filtering up to work with unicode representation (and avoid sequence
matching), but strings are coming from xlib and they are in locale encoding.

So, there are two general ways to deal with all this: 1) go through
intermediate unicode representation or 2) pre-build byte sequences based on the
user locale and apply them as-is afterwards. The second option seems less
wasteful but I'm not sure that a specific byte sequence will always match the
whole codepoint and won't span across multiple codepoints (it can't happen in
utf8 and such, but there are lots of encodings). Anyway, I will look into this
somewhat soon.

Loading...