Here is console for successful run rootless on Ubuntu 18. So why not
working on CentOS 7.x?
=====
Script started on 2019-10-14 11:55:21-0400
]0;eae@f12n13: /home/eae eae@f12n13:~$ podman info
host:
BuildahVersion: 1.9.0
Conmon:
package: 'conmon: /usr/libexec/podman/conmon'
path: /usr/libexec/podman/conmon
version: 'conmon version 1.0.0-rc2, commit: unknown'
Distribution:
distribution: ubuntu
version: "18.04"
MemFree: 284860088320
MemTotal: 548435001344
OCIRuntime:
package: 'cri-o-runc: /usr/lib/cri-o-runc/sbin/runc'
path: /usr/lib/cri-o-runc/sbin/runc
version: 'runc version spec: 1.0.1-dev'
SwapFree: 23127326720
SwapTotal: 23152492544
arch: ppc64le
cpus: 192
hostname: f12n13
kernel: 4.15.0-55-generic
os: linux
rootless: true
uptime: 979h 10m 16.18s (Approximately 40.79 days)
registries:
blocked: null
insecure: null
search:
- docker.io
store:
ConfigFile: /home/eae/.config/containers/storage.conf
ContainerStore:
number: 0
GraphDriverName: vfs
GraphOptions: null
GraphRoot: /tmp/eae/containers/storage
GraphStatus: {}
ImageStore:
number: 3
RunRoot: /run/user/2018
VolumePath: /tmp/eae/containers/storage/volumes
]0;eae@f12n13: /home/eae eae@f12n13:~$ cat ./.config/containers/libpod.conf
volume_path = "/tmp/eae/containers/storage/volumes"
image_default_transport = "docker://"
runtime = "runc"
conmon_path = ["/usr/libexec/podman/conmon",
"/usr/local/lib/podman/conmon", "/usr/bin/conmon",
"/usr/sbin/conmon",
"/usr/local/bin/conmon", "/usr/local/sbin/conmon"]
conmon_env_vars =
["PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"]
cgroup_manager = "cgroupfs"
init_path = "/usr/libexec/podman/catatonit"
static_dir = "/tmp/eae/containers/storage/libpod"
tmp_dir = "/run/user/2018/libpod/tmp"
max_log_size = -1
no_pivot_root = false
cni_config_dir = "/etc/cni/net.d/"
cni_plugin_dir = ["/usr/libexec/cni", "/usr/lib/cni",
"/usr/local/lib/cni",
"/opt/cni/bin"]
infra_image = "k8s.gcr.io/pause:3.1"
infra_command = "/pause"
enable_port_reservation = true
label = true
network_cmd_path = ""
num_locks = 2048
events_logger = "journald"
EventsLogFilePath = ""
detach_keys = "ctrl-p,ctrl-q"
hooks_dir = ["/etc/containers/oci/hooks.d"]
[runtimes]
runc = ["/usr/lib/cri-o-runc/sbin/runc"]
]0;eae@f12n13: /home/eae eae@f12n13:~$ cat
/etc/containers/oci/hooks.d/oci-nvidia-hook.json
{
"version": "1.0.0",
"hook": {
"path": "/usr/bin/nvidia-container-runtime-hook",
"args": ["nvidia-container-runtime-hook", "prestart"]
},
"when": {
"always": true
},
"stages": ["prestart"]
}
]0;eae@f12n13: /home/eae eae@f12n13:~$ podman --log-level=debug run --rm
ibmcom/tensorflow-ppc64le:1.14.0-gpu nvidia-smi
[36mINFO [0m[0000] running as rootless
[37mDEBU [0m[0000] Initializing boltdb state at
/tmp/eae/containers/storage/libpod/bolt_state.db
[37mDEBU [0m[0000] Using graph driver vfs
[37mDEBU [0m[0000] Using graph root /tmp/eae/containers/storage
[37mDEBU [0m[0000] Using run root /run/user/2018
[37mDEBU [0m[0000] Using static dir /tmp/eae/containers/storage/libpod
[37mDEBU [0m[0000] Using tmp dir /run/user/2018/libpod/tmp
[37mDEBU [0m[0000] Using volume path /tmp/eae/containers/storage/volumes
[37mDEBU [0m[0000] Set libpod namespace to ""
[37mDEBU [0m[0000] [graphdriver] trying provided driver "vfs"
[37mDEBU [0m[0000] Initializing event backend journald
[37mDEBU [0m[0000] parsed reference into "[vfs@
/tmp/eae/containers/storage+/run/user/2018]
docker.io/ibmcom/tensorflow-ppc64le:1.14.0-gpu"
[37mDEBU [0m[0000] parsed reference into
"[vfs@/tmp/eae/containers/storage+/run/user/2018]@09f6383a1fbb35ca5fbccf6f0bd21a5c3d5264b6c0b56597fd47a97c99421727"
[37mDEBU [0m[0000] exporting opaque data as blob
"sha256:09f6383a1fbb35ca5fbccf6f0bd21a5c3d5264b6c0b56597fd47a97c99421727"
[37mDEBU [0m[0000] parsed reference into
"[vfs@/tmp/eae/containers/storage+/run/user/2018]@09f6383a1fbb35ca5fbccf6f0bd21a5c3d5264b6c0b56597fd47a97c99421727"
[37mDEBU [0m[0000] exporting opaque data as blob
"sha256:09f6383a1fbb35ca5fbccf6f0bd21a5c3d5264b6c0b56597fd47a97c99421727"
[37mDEBU [0m[0000] parsed reference into
"[vfs@/tmp/eae/containers/storage+/run/user/2018]@09f6383a1fbb35ca5fbccf6f0bd21a5c3d5264b6c0b56597fd47a97c99421727"
[37mDEBU [0m[0000] Got mounts: []
[37mDEBU [0m[0000] Got volumes: []
[37mDEBU [0m[0000] Using slirp4netns netmode
[37mDEBU [0m[0000] created OCI spec and options for new container
[37mDEBU [0m[0000] Allocated lock 0 for container
427b4716bca98d8f83fbe6b18095840f75faf8217d5b3e685505ec19c92c21d6
[37mDEBU [0m[0000] parsed reference into
"[vfs@/tmp/eae/containers/storage+/run/user/2018]@09f6383a1fbb35ca5fbccf6f0bd21a5c3d5264b6c0b56597fd47a97c99421727"
[37mDEBU [0m[0000] exporting opaque data as blob
"sha256:09f6383a1fbb35ca5fbccf6f0bd21a5c3d5264b6c0b56597fd47a97c99421727"
[37mDEBU [0m[0002] created container
"427b4716bca98d8f83fbe6b18095840f75faf8217d5b3e685505ec19c92c21d6"
[37mDEBU [0m[0002] container
"427b4716bca98d8f83fbe6b18095840f75faf8217d5b3e685505ec19c92c21d6" has work
directory
"/tmp/eae/containers/storage/vfs-containers/427b4716bca98d8f83fbe6b18095840f75faf8217d5b3e685505ec19c92c21d6/userdata"
[37mDEBU [0m[0002] container
"427b4716bca98d8f83fbe6b18095840f75faf8217d5b3e685505ec19c92c21d6" has run
directory
"/run/user/2018/vfs-containers/427b4716bca98d8f83fbe6b18095840f75faf8217d5b3e685505ec19c92c21d6/userdata"
[37mDEBU [0m[0002] New container created
"427b4716bca98d8f83fbe6b18095840f75faf8217d5b3e685505ec19c92c21d6"
[37mDEBU [0m[0002] container
"427b4716bca98d8f83fbe6b18095840f75faf8217d5b3e685505ec19c92c21d6" has
CgroupParent
"/libpod_parent/libpod-427b4716bca98d8f83fbe6b18095840f75faf8217d5b3e685505ec19c92c21d6"
[37mDEBU [0m[0002] Not attaching to stdin
[37mDEBU [0m[0002] mounted container
"427b4716bca98d8f83fbe6b18095840f75faf8217d5b3e685505ec19c92c21d6" at
"/tmp/eae/containers/storage/vfs/dir/6493d899b4a3aa9facb67ec96a1ba381c135c5003b384ac6da1af433d816714f"
[37mDEBU [0m[0002] Created root filesystem for container
427b4716bca98d8f83fbe6b18095840f75faf8217d5b3e685505ec19c92c21d6 at
/tmp/eae/containers/storage/vfs/dir/6493d899b4a3aa9facb67ec96a1ba381c135c5003b384ac6da1af433d816714f
[37mDEBU [0m[0002] /etc/system-fips does not exist on host, not mounting
FIPS mode secret
[37mDEBU [0m[0002] reading hooks from /etc/containers/oci/hooks.d
[37mDEBU [0m[0002] added hook
/etc/containers/oci/hooks.d/oci-nvidia-hook.json
[37mDEBU [0m[0002] hook oci-nvidia-hook.json matched; adding to stages
[prestart]
[37mDEBU [0m[0002] Created OCI spec for container
427b4716bca98d8f83fbe6b18095840f75faf8217d5b3e685505ec19c92c21d6 at
/tmp/eae/containers/storage/vfs-containers/427b4716bca98d8f83fbe6b18095840f75faf8217d5b3e685505ec19c92c21d6/userdata/config.json
[37mDEBU [0m[0002] /usr/libexec/podman/conmon messages will be logged to
syslog
[37mDEBU [0m[0002] running conmon: /usr/libexec/podman/conmon [37margs
[0m="[-c 427b4716bca98d8f83fbe6b18095840f75faf8217d5b3e685505ec19c92c21d6
-u 427b4716bca98d8f83fbe6b18095840f75faf8217d5b3e685505ec19c92c21d6 -n
dazzling_cartwright -r /usr/lib/cri-o-runc/sbin/runc -b
/tmp/eae/containers/storage/vfs-containers/427b4716bca98d8f83fbe6b18095840f75faf8217d5b3e685505ec19c92c21d6/userdata
-p
/run/user/2018/vfs-containers/427b4716bca98d8f83fbe6b18095840f75faf8217d5b3e685505ec19c92c21d6/userdata/pidfile
--exit-dir /run/user/2018/libpod/tmp/exits --conmon-pidfile
/run/user/2018/vfs-containers/427b4716bca98d8f83fbe6b18095840f75faf8217d5b3e685505ec19c92c21d6/userdata/conmon.pid
--exit-command /usr/bin/podman --exit-command-arg --root --exit-command-arg
/tmp/eae/containers/storage --exit-command-arg --runroot --exit-command-arg
/run/user/2018 --exit-command-arg --log-level --exit-command-arg debug
--exit-command-arg --cgroup-manager --exit-command-arg cgroupfs
--exit-command-arg --tmpdir --exit-command-arg /run/user/2018/libpod/tmp
--exit-command-arg --runtime --exit-command-arg runc --exit-command-arg
--storage-driver --exit-command-arg vfs --exit-command-arg container
--exit-command-arg cleanup --exit-command-arg --rm --exit-command-arg
427b4716bca98d8f83fbe6b18095840f75faf8217d5b3e685505ec19c92c21d6
--socket-dir-path /run/user/2018/libpod/tmp/socket -l
k8s-file:/tmp/eae/containers/storage/vfs-containers/427b4716bca98d8f83fbe6b18095840f75faf8217d5b3e685505ec19c92c21d6/userdata/ctr.log
--log-level debug --syslog]"
[33mWARN [0m[0002] Failed to add conmon to cgroupfs sandbox cgroup: error
creating cgroup for cpuset: mkdir /sys/fs/cgroup/cpuset/libpod_parent:
permission denied
[conmon:d]: failed to write to /proc/self/oom_score_adj: Permission denied
[37mDEBU [0m[0003] Received container pid: 181118
[37mDEBU [0m[0003] Created container
427b4716bca98d8f83fbe6b18095840f75faf8217d5b3e685505ec19c92c21d6 in OCI
runtime
[37mDEBU [0m[0003] Attaching to container
427b4716bca98d8f83fbe6b18095840f75faf8217d5b3e685505ec19c92c21d6
[37mDEBU [0m[0003] connecting to socket
/run/user/2018/libpod/tmp/socket/427b4716bca98d8f83fbe6b18095840f75faf8217d5b3e685505ec19c92c21d6/attach
[37mDEBU [0m[0003] Starting container
427b4716bca98d8f83fbe6b18095840f75faf8217d5b3e685505ec19c92c21d6 with
command [nvidia-smi]
[37mDEBU [0m[0003] Started container
427b4716bca98d8f83fbe6b18095840f75faf8217d5b3e685505ec19c92c21d6
Mon Oct 14 17:18:44 2019
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 410.104 Driver Version: 410.104 CUDA Version: 10.0
|
|-------------------------------+----------------------+----------------------+
| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr.
ECC |
| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute
M. |
|===============================+======================+======================|
| 0 Tesla K80 On | 00000002:03:00.0 Off |
0 |
| N/A 32C P8 26W / 149W | 0MiB / 11441MiB | 2%
Default |
+-------------------------------+----------------------+----------------------+
[37mDEBU [0m[0003] Enabling signal proxying
| 1 Tesla K80 On | 00000002:04:00.0 Off |
0 |
| N/A 29C P8 31W / 149W | 0MiB / 11441MiB | 2%
Default |
+-------------------------------+----------------------+----------------------+
+-----------------------------------------------------------------------------+
| Processes: GPU
Memory |
| GPU PID Type Process name Usage
|
|=============================================================================|
| No running processes found
|
+-----------------------------------------------------------------------------+
[37mDEBU [0m[0004] Checking container
427b4716bca98d8f83fbe6b18095840f75faf8217d5b3e685505ec19c92c21d6 status...
[37mDEBU [0m[0004] Attempting to read container
427b4716bca98d8f83fbe6b18095840f75faf8217d5b3e685505ec19c92c21d6 exit code
from file
/run/user/2018/libpod/tmp/exits/427b4716bca98d8f83fbe6b18095840f75faf8217d5b3e685505ec19c92c21d6-old
[37mDEBU [0m[0004] [graphdriver] trying provided driver "vfs"
]0;eae@f12n13: /home/eae eae@f12n13:~$ exit
Script done on 2019-10-14 13:18:47-0400
On Tue, Oct 15, 2019 at 9:54 AM Bryan Hepworth <bryan.hepworth(a)gmail.com>
wrote:
Hi All
I'll have to follow this thread as gpu's are very much on my radar too.
Bryan
On Tue, Oct 15, 2019 at 2:34 PM Giuseppe Scrivano <gscrivan(a)redhat.com>
wrote:
> Hi Scott,
>
> Scott McCarty <smccarty(a)redhat.com> writes:
>
> > Giuseppe,
> > Is that something that will potentially be fixed with cgroups v2?
> My gut says it would be:
> >
> > 1. Get the world to cgroup v2
> > 2. Nvidia might have to redesign some things?
> >
> > Until then, it's not possible right?
>
> no, cgroups v2 won't solve access to the devices cgroups for rootless.
> Configuring the devices cgroups on cgroup v2 requires using eBPF that is
> a privileged operation.
>
> Giuseppe
> _______________________________________________
> Podman mailing list -- podman(a)lists.podman.io
> To unsubscribe send an email to podman-leave(a)lists.podman.io
>