[prev in list] [next in list] [prev in thread] [next in thread] 

List:       bash-bug
Subject:    Re: heap-buffer-overflow in history_expand
From:       Grisha Levit <grishalevit () gmail ! com>
Date:       2023-04-30 9:03:43
Message-ID: CAMu=Brrv5qKY6LPfw8PxqNXNO8rNsZo0Fb=BcFb-uHObWPqnrw () mail ! gmail ! com
[Download RAW message or body]

On Sat, Apr 29, 2023, 14:02 Chet Ramey <chet.ramey@case.edu> wrote:

> On 4/28/23 9:28 PM, Grisha Levit wrote:
> > Piping input that simply ends in an leading byte doesn't trigger the
> issue
> > -- that byte byte don't seem to make it into the input line.
> >
> > This is a bit off topic, but I don't really understand what happens wit=
h
> > invalid input sequences in the input, see e.g.:
>
> They should be treated as individual bytes.
>

I think I see what's happening now. Readline accumulates the bytes until a
complete character is read. However, this buffer is not flushed when the
reading of a multibyte character is interrupted by inserting a single byte
character, or by any non-insertion command.

So for example, the \317 byte never gets a chance to be inserted here:
bash --norc -in <<<$':\317:'
$ ::

And inserting the byte is deferred until the next byte with the 8th bit set
is read (which can be at some arbitrary future time) here:
bash --norc -in <<<$':\317\n: \200'
$ :
$ : =CF=80

You can also reproduce interactively by binding the above input to a macro.

Attached is a patch that I think should address this.

["0001-fix-invalid-mb-insert.patch" (text/x-diff)]

From d7d1043e2bc2ad35ac2ef6a2cc45467085cf5835 Mon Sep 17 00:00:00 2001
From: Grisha Levit <grishalevit@gmail.com>
Date: Fri, 28 Apr 2023 23:24:34 -0400
Subject: [PATCH] fix invalid mb insert

lib/readline/text.c
	- _rl_insert_char: return 1 when there are any pending bytes
	- _rl_insert_char: treat being called with a count of 0 as a
	  signal to insert any pending bytes read so far.
	- rl_insert, rl_quoted_insert: if the final _rl_insert_char
	  returns 1, call _rl_insert_char with a count of 0 to insert
	  any pending bytes.
---
 lib/readline/text.c | 68 +++++++++++++++++++++++++++++++++++----------
 1 file changed, 54 insertions(+), 14 deletions(-)

diff --git a/lib/readline/text.c b/lib/readline/text.c
index 356cac5f..27d9292b 100644
--- a/lib/readline/text.c
+++ b/lib/readline/text.c
@@ -704,7 +704,8 @@ static mbstate_t ps = {0};
 
 /* Insert the character C at the current location, moving point forward.
    If C introduces a multibyte sequence, we read the whole sequence and
-   then insert the multibyte char into the line buffer. */
+   then insert the multibyte char into the line buffer.
+   If COUNT is 0, just insert any accumulated multibyte sequence as is. */
 int
 _rl_insert_char (int count, int c)
 {
@@ -718,11 +719,32 @@ _rl_insert_char (int count, int c)
   static int stored_count = 0;
 #endif
 
+#if !defined (HANDLE_MULTIBYTE)
   if (count <= 0)
     return 0;
+#else
+  if (count < 0)
+    return 0;
+  if (count == 0)
+    {
+      /* If COUNT is 0, we were called because we are no longer reading
+         a multibyte character. If we were waiting to complete a character,
+        just insert any bytes we have read so far. */
+      if (pending_bytes_length == 0)
+	return 0;
 
-#if defined (HANDLE_MULTIBYTE)
-  if (MB_CUR_MAX == 1 || rl_byte_oriented)
+      if (stored_count <= 0)
+	stored_count = count;
+      else
+	count = stored_count;
+
+      memcpy (incoming, pending_bytes, pending_bytes_length);
+      incoming[pending_bytes_length] = '\0';
+      incoming_length = pending_bytes_length;
+      pending_bytes_length = 0;
+      memset (&ps, 0, sizeof (mbstate_t));
+    }
+  else if (MB_CUR_MAX == 1 || rl_byte_oriented)
     {
       incoming[0] = c;
       incoming[1] = '\0';
@@ -730,6 +752,9 @@ _rl_insert_char (int count, int c)
     }
   else if (_rl_utf8locale && (c & 0x80) == 0)
     {
+      if (pending_bytes_length)
+        _rl_insert_char (0, 0);
+
       incoming[0] = c;
       incoming[1] = '\0';
       incoming_length = 1;
@@ -827,7 +852,11 @@ _rl_insert_char (int count, int c)
       rl_insert_text (string);
       xfree (string);
 
+#if defined (HANDLE_MULTIBYTE)
+      return pending_bytes_length ? 1 : 0;
+#else
       return 0;
+#endif
     }
 
   if (count > TEXT_COUNT_MAX)
@@ -860,6 +889,8 @@ _rl_insert_char (int count, int c)
       xfree (string);
       incoming_length = 0;
       stored_count = 0;
+
+      return pending_bytes_length ? 1 : 0;
 #else /* !HANDLE_MULTIBYTE */
       char str[TEXT_COUNT_MAX+1];
 
@@ -873,9 +904,9 @@ _rl_insert_char (int count, int c)
 	  rl_insert_text (str);
 	  count -= decreaser;
 	}
-#endif /* !HANDLE_MULTIBYTE */
 
       return 0;
+#endif /* !HANDLE_MULTIBYTE */
     }
 
   if (MB_CUR_MAX == 1 || rl_byte_oriented)
@@ -903,9 +934,12 @@ _rl_insert_char (int count, int c)
       rl_insert_text (incoming);
       stored_count = 0;
     }
-#endif
 
+  return pending_bytes_length ? 1 : 0;
+#else /* !HANDLE_MULTIBYTE */
   return 0;
+#endif /* !HANDLE_MULTIBYTE */
+
 }
 
 /* Overwrite the character at point (or next COUNT characters) with C.
@@ -955,7 +989,7 @@ rl_insert (int count, int c)
   r = (rl_insert_mode == RL_IM_INSERT) ? _rl_insert_char (count, c) : _rl_overwrite_char (count, c);
 
   /* XXX -- attempt to batch-insert pending input that maps to self-insert */
-  x = 0;
+  r = x = 0;
   n = (unsigned short)-2;
   while (_rl_optimize_typeahead &&
 	 rl_num_chars_to_read == 0 &&
@@ -983,6 +1017,10 @@ rl_insert (int count, int c)
 	break;
     }
 
+  /* We are done with self-insert, make sure any pending bytes get inserted */
+  if (r == 1 && rl_insert_mode == RL_IM_INSERT)
+    r = _rl_insert_char (0, 0);
+
   if (n != (unsigned short)-2)		/* -2 = sentinel value for having inserted N */
     {
       /* setting rl_pending_input inhibits setting rl_last_func so we do it
@@ -1054,6 +1092,8 @@ _rl_insert_next_callback (_rl_callback_generic_arg *data)
 int
 rl_quoted_insert (int count, int key)
 {
+  int r;
+
   /* Let's see...should the callback interface futz with signal handling? */
 #if defined (HANDLE_SIGNALS)
   if (RL_ISSTATE (RL_STATE_CALLBACK) == 0)
@@ -1071,16 +1111,16 @@ rl_quoted_insert (int count, int key)
 
   /* A negative count means to quote the next -COUNT characters. */
   if (count < 0)
-    {
-      int r;
+    do
+      r = _rl_insert_next (1);
+    while (r == 0 && ++count < 0);
+  else
+    r = _rl_insert_next (count);
 
-      do
-	r = _rl_insert_next (1);
-      while (r == 0 && ++count < 0);
-      return r;
-    }
+  if (r == 1)
+    _rl_insert_char (0, 0);
 
-  return _rl_insert_next (count);
+  return r;
 }
 
 /* Insert a tab character. */
-- 
2.40.1



[prev in list] [next in list] [prev in thread] [next in thread] 

Configure | About | News | Add a list | Sponsored by KoreLogic