[prev in list] [next in list] [prev in thread] [next in thread] 

List:       lxc-devel
Subject:    [lxc-devel] [lxd/master] Add support for NVIDIA runtime passthrough
From:       stgraber on Github <lxc-bot () linuxcontainers ! org>
Date:       2018-03-29 18:03:33
Message-ID: 20180329180333.5FCE6520BE () mailman01 ! srv ! dcmtl ! stgraber ! net
[Download RAW message or body]

[Attachment #2 (text/x-mailbox)]

The following pull request was submitted through Github.
It can be accessed and reviewed at: https://github.com/lxc/lxd/pull/4373

This e-mail was sent by the LXC bot, direct replies will not reach the author
unless they happen to be subscribed to this list.

=== Description (from pull-request) ===
Signed-off-by: Stéphane Graber <stgraber@ubuntu.com>

[Attachment #3 (text/plain)]

From 65cec7f2c884ea2490fe9376d10abe8aefa97b84 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Stéphane Graber?= <stgraber@ubuntu.com>
Date: Thu, 29 Mar 2018 14:03:02 -0400
Subject: [PATCH] Add support for NVIDIA runtime passthrough
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Stéphane Graber <stgraber@ubuntu.com>
---
 doc/api-extensions.md   |  5 +++++
 doc/containers.md       |  2 ++
 lxd/container_lxc.go    | 33 +++++++++++++++++++++++++++++++++
 scripts/bash/lxd-client |  2 +-
 shared/container.go     |  2 ++
 shared/version/api.go   |  1 +
 6 files changed, 44 insertions(+), 1 deletion(-)

diff --git a/doc/api-extensions.md b/doc/api-extensions.md
index 7ece1b0f4..e5cd6e2ac 100644
--- a/doc/api-extensions.md
+++ b/doc/api-extensions.md
@@ -449,3 +449,8 @@ This adds a new `lifecycle` message type to the events API.

 ## storage\_api\_remote\_volume\_handling
 This adds the ability to copy and move custom storage volumes between remote.
+
+## nvidia\_runtime
+Adds a `nvidia_runtime` config option for containers, setting this to
+true will have the NVIDIA runtime and CUDA libraries passed to the
+container.
diff --git a/doc/containers.md b/doc/containers.md
index 42152f964..46d2af9fd 100644
--- a/doc/containers.md
+++ b/doc/containers.md
@@ -15,6 +15,7 @@ currently supported:
  - `environment` (environment variables)
  - `image` (copy of the image properties at time of creation)
  - `limits` (resource limits)
+ - `nvidia` (NVIDIA and CUDA configuration)
  - `raw` (raw container configuration overrides)
  - `security` (security policies)
  - `user` (storage for user properties, searchable)
@@ -45,6 +46,7 @@ linux.kernel\_modules                   | string    | -             \
| yes  migration.incremental.memory            | boolean   | false         | yes      \
| migration\_pre\_copy                 | Incremental memory transfer of the \
container's memory to reduce downtime.  migration.incremental.memory.goal       | \
integer   | 70            | yes           | migration\_pre\_copy                 | \
Percentage of memory to have in sync before stopping the container.  \
migration.incremental.memory.iterations | integer   | 10            | yes           | \
migration\_pre\_copy                 | Maximum number of transfer operations to go \
through before stopping the container. +nvidia.runtime                          | \
boolean   | false         | no            | nvidia\_runtime                      | \
Pass the host NVIDIA and CUDA runtime libraries into the container  raw.apparmor      \
| blob      | -             | yes           | -                                    | \
Apparmor profile entries to be appended to the generated profile  raw.idmap           \
| blob      | -             | no            | id\_map                              | \
Raw idmap configuration (e.g. "both 1000 1000")  raw.lxc                              \
| blob      | -             | no            | -                                    | \
                Raw LXC configuration to be appended to the generated one
diff --git a/lxd/container_lxc.go b/lxd/container_lxc.go
index 8195b9da9..a0ea90b2b 100644
--- a/lxd/container_lxc.go
+++ b/lxd/container_lxc.go
@@ -1165,6 +1165,39 @@ func (c *containerLXC) initLXC(config bool) error {
 		}
 	}

+	// Setup NVIDIA runtime
+	if shared.IsTrue(c.expandedConfig["nvidia.runtime"]) {
+		hookDir := os.Getenv("LXD_LXC_HOOK")
+		if hookDir == "" {
+			hookDir = "/usr/share/lxc/hooks"
+		}
+
+		hookPath := filepath.Join(hookDir, "nvidia")
+		if !shared.PathExists(hookPath) {
+			return fmt.Errorf("The NVIDIA LXC hook couldn't be found")
+		}
+
+		_, err := exec.LookPath("nvidia-container-cli")
+		if err != nil {
+			return fmt.Errorf("The NVIDIA container tools couldn't be found")
+		}
+
+		err = lxcSetConfigItem(cc, "lxc.environment", "NVIDIA_VISIBLE_DEVICES=none")
+		if err != nil {
+			return err
+		}
+
+		err = lxcSetConfigItem(cc, "lxc.environment", \
"NVIDIA_DRIVER_CAPABILITIES=compute,utility") +		if err != nil {
+			return err
+		}
+
+		err = lxcSetConfigItem(cc, "lxc.hook.mount", hookPath)
+		if err != nil {
+			return err
+		}
+	}
+
 	// Memory limits
 	if c.state.OS.CGroupMemoryController {
 		memory := c.expandedConfig["limits.memory"]
diff --git a/scripts/bash/lxd-client b/scripts/bash/lxd-client
index 671c1b773..bc4d4a8cb 100644
--- a/scripts/bash/lxd-client
+++ b/scripts/bash/lxd-client
@@ -80,7 +80,7 @@ _have lxc && {
       limits.disk.priority limits.memory limits.memory.enforce \
       limits.memory.swap limits.memory.swap.priority limits.network.priority \
       limits.processes linux.kernel_modules migration.incremental.memory \
-      migration.incremental.memory.goal \
+      migration.incremental.memory.goal nvidia.runtime \
       migration.incremental.memory.iterations raw.apparmor raw.idmap raw.lxc \
       raw.seccomp security.idmap.base security.idmap.isolated \
       security.idmap.size security.devlxd security.nesting security.privileged \
diff --git a/shared/container.go b/shared/container.go
index 3836b80f3..b6cfc7ada 100644
--- a/shared/container.go
+++ b/shared/container.go
@@ -206,6 +206,8 @@ var KnownContainerConfigKeys = map[string]func(value string) \
error{  "migration.incremental.memory.iterations": IsUint32,
 	"migration.incremental.memory.goal":       IsUint32,

+	"nvidia.runtime": IsBool,
+
 	"security.nesting":    IsBool,
 	"security.privileged": IsBool,
 	"security.devlxd":     IsBool,
diff --git a/shared/version/api.go b/shared/version/api.go
index f6e718a42..bec41352f 100644
--- a/shared/version/api.go
+++ b/shared/version/api.go
@@ -101,6 +101,7 @@ var APIExtensions = []string{
 	"clustering",
 	"event_lifecycle",
 	"storage_api_remote_volume_handling",
+	"nvidia_runtime",
 }

 // APIExtensionsCount returns the number of available API extensions.


[Attachment #4 (text/plain)]

_______________________________________________
lxc-devel mailing list
lxc-devel@lists.linuxcontainers.org
http://lists.linuxcontainers.org/listinfo/lxc-devel


[prev in list] [next in list] [prev in thread] [next in thread] 

Configure | About | News | Add a list | Sponsored by KoreLogic