'[PATCH v7 3/7] unshare: new applet'

[prev in list] [next in list] [prev in thread] [next in thread] 

List:       busybox
Subject:    [PATCH v7 3/7] unshare: new applet
From:       Bartosz Golaszewski <bartekgola () gmail ! com>
Date:       2016-03-18 11:37:46
Message-ID: 1458301070-25452-4-git-send-email-bartekgola () gmail ! com
[Download RAW message or body]

Add a fully featured unshare implementation implementing all arguments
supported in the upstream version.

Signed-off-by: Bartosz Golaszewski <bartekgola@gmail.com>
---
 util-linux/unshare.c | 465 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 465 insertions(+)
 create mode 100644 util-linux/unshare.c

diff --git a/util-linux/unshare.c b/util-linux/unshare.c
new file mode 100644
index 0000000..742d336
--- /dev/null
+++ b/util-linux/unshare.c
@@ -0,0 +1,465 @@
+/* vi: set sw=4 ts=4: */
+/*
+ * Mini unshare implementation for busybox.
+ *
+ * Copyright (C) 2016 by Bartosz Golaszewski <bartekgola@gmail.com>
+ *
+ * Licensed under GPLv2 or later, see file LICENSE in this source tree.
+ */
+
+//config:config UNSHARE
+//config:	bool "unshare"
+//config:	default y
+//config:	select PLATFORM_LINUX
+//config:	help
+//config:	  Run program with some namespaces unshared from parent.
+//config:
+//config:config FEATURE_UNSHARE_LONG_OPTS
+//config:	bool "enable long options"
+//config:	default y
+//config:	depends on UNSHARE && LONG_OPTS
+//config:	help
+//config:	  Support long options for the unshare applet. This makes
+//config:	  the busybox implementation more compatible with upstream.
+
+//applet:IF_UNSHARE(APPLET(unshare, BB_DIR_USR_BIN, BB_SUID_DROP))
+
+//kbuild:lib-$(CONFIG_UNSHARE) += unshare.o
+
+//usage:#define unshare_trivial_usage
+//usage:       "[options] <program> [args...]"
+//usage:#if ENABLE_FEATURE_UNSHARE_LONG_OPTS
+//usage:#define unshare_full_usage "\n\n"
+//usage:       "Options:"
+//usage:     "\n	-m, --mount[=<file>]		unshare mounts namespace"
+//usage:     "\n	-u, --uts[=<file>]		unshare UTS namespace (hostname etc.)"
+//usage:     "\n	-i, --ipc[=<file>]		unshare System V IPC namespace"
+//usage:     "\n	-n, --network[=<file>]		unshare network namespace"
+//usage:     "\n	-p, --pid[=<file>]		unshare pid namespace"
+//usage:     "\n	-U, --user[=<file>]		unshare user namespace"
+//usage:     "\n	-f, --fork			fork before launching <program>"
+//usage:     "\n	-M, --mount-proc[=<dir>]	mount proc filesystem first (implies --mount)"
+//usage:     "\n	-r, --map-root-user		map current user to root (implies --user)"
+//usage:     "\n	-P, --propagation slave|shared|private|unchanged"
+//usage:     "\n					modify mount propagation in mount namespace"
+//usage:     "\n	-s, --setgroups allow|deny	control the setgroups syscall in user namespaces"
+//usage:#else
+//usage:#define unshare_full_usage "\n\n"
+//usage:       "Options:"
+//usage:     "\n	-m [<file>]	unshare mounts namespace"
+//usage:     "\n	-u [<file>]	unshare UTS namespace (hostname etc.)"
+//usage:     "\n	-i [<file>]	unshare System V IPC namespace"
+//usage:     "\n	-n [<file>]	unshare network namespace"
+//usage:     "\n	-p [<file>]	unshare pid namespace"
+//usage:     "\n	-U [<file>]	unshare user namespace"
+//usage:     "\n	-f		fork before launching <program>"
+//usage:     "\n	-M [<dir>]	mount proc filesystem first (implies -m)"
+//usage:     "\n	-r		map current user to root (implies -u)"
+//usage:     "\n	-P slave|shared|private|unchanged"
+//usage:     "\n			modify mount propagation in mount namespace"
+//usage:     "\n	-s allow|deny	ontrol the setgroups syscall in user namespaces"
+//usage:#endif
+
+#include "libbb.h"
+
+#include <sched.h>
+#include <sys/types.h>
+#include <sys/mount.h>
+
+/*
+ * Longest possible path to a procfs file used in unshare. Must be able to
+ * contain the '/proc/' string, the '/ns/user' string which is the longest
+ * namespace name and a 32-bit integer representing the process ID.
+ */
+#define PROC_PATH_MAX	(sizeof("/proc//ns/user") + INT_BUF_MAX(pid_t))
+
+#define PATH_PROC_SETGROUPS	"/proc/self/setgroups"
+#define PATH_PROC_UIDMAP	"/proc/self/uid_map"
+#define PATH_PROC_GIDMAP	"/proc/self/gid_map"
+
+enum {
+	OPT_mount	= BIT( 0),
+	OPT_uts		= BIT( 1),
+	OPT_ipc		= BIT( 2),
+	OPT_network	= BIT( 3),
+	OPT_pid		= BIT( 4),
+	OPT_user	= BIT( 5),
+	OPT_fork	= BIT( 6),
+	OPT_mount_proc	= BIT( 7),
+	OPT_map_root	= BIT( 8),
+	OPT_propagation	= BIT( 9),
+	OPT_setgroups	= BIT(10),
+};
+
+enum {
+	NS_MNT_POS = 0,
+	NS_UTS_POS,
+	NS_IPC_POS,
+	NS_NET_POS,
+	NS_PID_POS,
+	NS_USR_POS,
+	NS_COUNT,
+};
+
+struct namespace_descr {
+	const int opt;
+	const int flag;
+	const char *nsfile;
+};
+
+struct namespace_ctx {
+	char *path;
+};
+
+struct propagation_mode {
+	const char *name;
+	unsigned long flags;
+};
+
+/*
+ * Upstream unshare doesn't support short options for --mount-proc and
+ * --propagation, but let's add them here to let the user use them even with
+ * long options disabled in busybox config.
+ */
+static const char opt_str[] = "+m::u::i::n::p::U::fM::rP:s:";
+
+/*
+ * Upstream unshare only accepts optional arguments (namespace mountpoints)
+ * for long options. We support them for both short (for size reduction
+ * with LONG_OPTS disabled) and long opts (for upstream compatibility).
+ */
+#if ENABLE_FEATURE_UNSHARE_LONG_OPTS
+static const char unshare_longopts[] ALIGN1 =
+	"mount\0"		Optional_argument	"m"
+	"uts\0"			Optional_argument	"u"
+	"ipc\0"			Optional_argument	"i"
+	"network\0"		Optional_argument	"n"
+	"pid\0"			Optional_argument	"p"
+	"user\0"		Optional_argument	"U"
+	"fork\0"		No_argument		"f"
+	"mount-proc\0"		Optional_argument	"M"
+	"map-root-user\0"	No_argument		"r"
+	"propagation\0"		Required_argument	"P"
+	"setgroups\0"		Required_argument	"s";
+#endif
+
+static const struct namespace_descr ns_list[] = {
+	[NS_MNT_POS] = {
+		.opt = OPT_mount,
+		.flag = CLONE_NEWNS,
+		.nsfile = "mnt",
+	},
+	[NS_UTS_POS] = {
+		.opt = OPT_uts,
+		.flag = CLONE_NEWUTS,
+		.nsfile = "uts",
+	},
+	[NS_IPC_POS] = {
+		.opt = OPT_ipc,
+		.flag = CLONE_NEWIPC,
+		.nsfile = "ipc",
+	},
+	[NS_NET_POS] = {
+		.opt = OPT_network,
+		.flag = CLONE_NEWNET,
+		.nsfile = "net",
+	},
+	[NS_PID_POS] = {
+		.opt = OPT_pid,
+		.flag = CLONE_NEWPID,
+		.nsfile = "pid",
+	},
+	[NS_USR_POS] = {
+		.opt = OPT_user,
+		.flag = CLONE_NEWUSER,
+		.nsfile = "user",
+	},
+};
+
+static unsigned long parse_propagation(const char *prop_str)
+{
+	static const struct propagation_mode prop_modes[] = {
+		{
+			.name = "slave",
+			.flags = MS_REC | MS_SLAVE,
+		},
+		{
+			.name =	"private",
+			.flags = MS_REC | MS_PRIVATE,
+		},
+		{
+			.name = "shared",
+			.flags = MS_REC | MS_SHARED,
+		},
+		{
+			.name = "unchanged",
+			.flags = 0,
+		}
+	};
+
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(prop_modes); i++) {
+		if (strcmp(prop_modes[i].name, prop_str) == 0)
+			return prop_modes[i].flags;
+	}
+
+	bb_error_msg_and_die("unsupported propagation mode: %s", prop_str);
+}
+
+static ino_t get_mnt_ns_inode_by_pid(pid_t pid)
+{
+	char path[PROC_PATH_MAX];
+	struct stat statbuf;
+
+	snprintf(path, sizeof(path), "/proc/%d/ns/mnt", pid);
+	xstat(path, &statbuf);
+
+	return statbuf.st_ino;
+}
+
+static void mount_namespaces(pid_t pid, struct namespace_ctx *ns_ctx_list)
+{
+	const struct namespace_descr *ns;
+	struct namespace_ctx *ns_ctx;
+	char nsf[PROC_PATH_MAX];
+	int i, status;
+
+	for (i = 0; i < NS_COUNT; i++) {
+		ns = &ns_list[i];
+		ns_ctx = &ns_ctx_list[i];
+
+		if (!ns_ctx->path)
+			continue;
+
+		snprintf(nsf, sizeof(nsf), "/proc/%d/ns/%s", pid, ns->nsfile);
+
+		status = mount(nsf, ns_ctx->path, NULL, MS_BIND, NULL);
+		if (status < 0) {
+			bb_perror_msg_and_die("mount %s on %s failed",
+					      nsf, ns_ctx->path);
+		}
+	}
+}
+
+int unshare_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
+int unshare_main(int argc UNUSED_PARAM, char **argv)
+{
+	int unsflags = 0, i, need_mount = 0, status, setgrp_allow = 0;
+	const char *proc_mnt_target = "/proc", *prop_str, *setgrp_str;
+	unsigned long prop_flags = MS_REC | MS_PRIVATE;
+	struct namespace_ctx ns_ctx_list[NS_COUNT];
+	uid_t reuid = geteuid();
+	gid_t regid = getegid();
+	unsigned int opts;
+	pid_t pid = -1;
+
+	IF_FEATURE_UNSHARE_LONG_OPTS(applet_long_options = unshare_longopts);
+
+	memset(ns_ctx_list, 0, sizeof(struct namespace_ctx) * NS_COUNT);
+
+	opts = getopt32(argv, opt_str,
+			&ns_ctx_list[NS_MNT_POS].path,
+			&ns_ctx_list[NS_UTS_POS].path,
+			&ns_ctx_list[NS_IPC_POS].path,
+			&ns_ctx_list[NS_NET_POS].path,
+			&ns_ctx_list[NS_PID_POS].path,
+			&ns_ctx_list[NS_USR_POS].path,
+			&proc_mnt_target, &prop_str, &setgrp_str);
+	argv += optind;
+
+	/*
+	 * Mounting the proc filesystem before running the program implies
+	 * creating a new mount namespace since the /proc mount would
+	 * otherwise mess up existing programs on the system.
+	 */
+	if (opts & OPT_mount_proc)
+		opts |= OPT_mount;
+
+	/* Mapping user and group IDs to root implies --user. */
+	if (opts & OPT_map_root)
+		opts |= OPT_user;
+
+	if (opts & OPT_setgroups) {
+		if (strcmp(setgrp_str, "allow") == 0) {
+			setgrp_allow = 1;
+		} else if (strcmp(setgrp_str, "deny") == 0) {
+			setgrp_allow = 0;
+		} else {
+			bb_error_msg_and_die(
+				"unsupported --setgroups argument '%s'",
+				setgrp_str);
+		}
+	}
+
+	for (i = 0; i < NS_COUNT; i++) {
+		const struct namespace_descr *ns = &ns_list[i];
+		struct namespace_ctx *ns_ctx = &ns_ctx_list[i];
+
+		if (opts & ns->opt)
+			unsflags |= ns->flag;
+
+		if (ns_ctx->path)
+			need_mount = 1;
+	}
+
+	/* Silently ignore --propagation if --mount is not requested. */
+	if ((opts & OPT_propagation) && (opts & OPT_mount))
+		prop_flags = parse_propagation(prop_str);
+
+	/*
+	 * Special case: if we were requested to unshare the mount namespace
+	 * AND to make any namespace persistent (by bind mounting it) we need
+	 * to spawn a child process which will wait for the parent to call
+	 * unshare(), then mount parent's namespaces while still in the
+	 * previous namespace.
+	 */
+	if (need_mount && (opts & OPT_mount)) {
+		ino_t inop, inoc;
+		pid_t ppid;
+
+		/*
+		 * Can't use getppid() in child, as we can be unsharing the
+		 * pid namespace.
+		 */
+		ppid = getpid();
+
+		/*
+		 * Save current process' mount namespace file inode number. We
+		 * will later use it in child process to check if it already
+		 * changed meaning that this process already called unshare().
+		 */
+		inop = get_mnt_ns_inode_by_pid(ppid);
+
+		pid = xfork();
+		if (pid == 0) {
+			/*
+			 * Child - wait until parent calls unshare(). No issue
+			 * in busy-waiting - by the time we get here from
+			 * fork(), the parent has usually already unshared the
+			 * mount namespace. We should spin a few times at most.
+			 *
+			 * XXX Should probably use a pipe to notify the child
+			 * about completing unshare().
+			 */
+			do {
+				inoc = get_mnt_ns_inode_by_pid(ppid);
+			} while (inoc == inop);
+
+			/* Mount parent's unshared namespaces. */
+			mount_namespaces(ppid, ns_ctx_list);
+
+			return EXIT_SUCCESS;
+		} /* Parent continues. */
+	}
+
+	status = unshare(unsflags);
+	if (status < 0)
+		bb_perror_msg_and_die("unshare failed");
+
+	if (need_mount) {
+		/* Wait for the child to finish mounting the namespaces. */
+		if (opts & OPT_mount) {
+			int exit_status;
+
+			status = safe_waitpid(pid, &exit_status, 0);
+			if (status < 0)
+				bb_perror_msg_and_die("waitpid");
+
+			if (WIFEXITED(exit_status) &&
+			    WEXITSTATUS(exit_status) != EXIT_SUCCESS)
+				return WEXITSTATUS(status);
+		} else {
+			/*
+			 * Regular way - we were requested to mount some other
+			 * namespaces: mount them after the call to unshare().
+			 */
+			mount_namespaces(getpid(), ns_ctx_list);
+		}
+	}
+
+	/*
+	 * When we're unsharing the pid namespace, it's not the process that
+	 * calls unshare() that is put into the new namespace, but its first
+	 * child. The user may want to use this option to spawn a new process
+	 * that'll become PID 1 in this new namespace.
+	 */
+	if (opts & OPT_fork) {
+		int exit_status;
+
+		pid = xfork();
+		if (pid > 0) {
+			status = safe_waitpid(pid, &exit_status, 0);
+			if (status < 0)
+				bb_perror_msg_and_die("waitpid");
+
+			if (WIFEXITED(exit_status))
+				return WEXITSTATUS(exit_status);
+			else if (WIFSIGNALED(exit_status))
+				kill(getpid(), WTERMSIG(exit_status));
+
+			bb_error_msg_and_die("child exit failed");
+		} /* Child continues. */
+	}
+
+	if (opts & OPT_map_root) {
+		char uidmap_buf[sizeof(unsigned int) * 3 + sizeof(" 0 1")];
+
+		if ((opts & OPT_setgroups) && setgrp_allow) {
+			bb_error_msg_and_die(
+				"options --setgroups=allow and --map-root-user are mutually exclusive");
+		}
+
+		/*
+		 * Since Linux 3.19 unprivileged writing of /proc/self/gid_map
+		 * has s been disabled unless /proc/self/setgroups is written
+		 * first to permanently disable the ability to call setgroups
+		 * in that user namespace.
+		 */
+		xopen_xwrite_close(PATH_PROC_SETGROUPS, "deny");
+		snprintf(uidmap_buf, COMMON_BUFSIZE, "%u 0 1", reuid);
+		xopen_xwrite_close(PATH_PROC_UIDMAP, uidmap_buf);
+		snprintf(uidmap_buf, COMMON_BUFSIZE, "%u 0 1", regid);
+		xopen_xwrite_close(PATH_PROC_GIDMAP, uidmap_buf);
+	} else if (opts & OPT_setgroups) {
+		xopen_xwrite_close(PATH_PROC_SETGROUPS, setgrp_str);
+	}
+
+	if (opts & OPT_mount) {
+		status = mount("none", "/", NULL, prop_flags, NULL);
+		if (status < 0) {
+			bb_perror_msg_and_die(
+				"cannot change root filesystem propagation");
+		}
+	}
+
+	if (opts & OPT_mount_proc) {
+		int flags;
+
+		/*
+		 * When creating a new pid namespace, we might want the pid
+		 * subdirectories in /proc to remain consistent with the new
+		 * process IDs. Without --mount-proc the pids in /proc would
+		 * still reflect the old pid namespace. This is why we make
+		 * /proc private here and then do a fresh mount.
+		 */
+		flags = MS_PRIVATE | MS_REC;
+		status = mount("none", proc_mnt_target, NULL, flags, NULL);
+		if (status == 0) {
+			flags = MS_NOSUID | MS_NOEXEC | MS_NODEV;
+			status = mount("proc", proc_mnt_target,
+				       "proc", flags, NULL);
+		}
+
+		if (status < 0)
+			bb_perror_msg_and_die("mount %s failed",
+					      proc_mnt_target);
+	}
+
+	if (*argv) {
+		execvp(*argv, argv);
+		bb_perror_msg_and_die("failed to execute %s", *argv);
+	}
+
+	run_shell(getenv("SHELL"), 0, NULL, NULL);
+}
-- 
2.1.4

_______________________________________________
busybox mailing list
busybox@busybox.net
http://lists.busybox.net/mailman/listinfo/busybox
[prev in list] [next in list] [prev in thread] [next in thread]