mirror of
https://github.com/gentoo-mirror/gentoo.git
synced 2025-12-21 21:17:37 -08:00
rocm.eclass: add graceful rocm_add_sandbox; allow NPU for the sandbox
1) With upcoming firmware AMD NPU will be exposed as /dev/accel/accel0. This device is owned by root:render, similarly to GPU. When present, tools like rocminfo try to query device capabilities, breaking sandbox. To fix this issue, this device has now addwrite in check_amdgpu. 2) There are a bunch of bugs from tinderbox and users who forgot to enable KFD in kernel. Instead of recommendation to check permissions, they will see a better message, that AMD device is missing. 3) In cases when we just want to addwrite to AMD devices, new function rocm_add_sandbox (similar to cuda_add_sandbox) was added. No errors are raised if device is missing. Bug: https://bugs.gentoo.org/965198 Signed-off-by: Sv. Lockal <lockalsash@gmail.com> Part-of: https://github.com/gentoo/gentoo/pull/44355 Signed-off-by: Sam James <sam@gentoo.org>
This commit is contained in:
parent
1ffaa41038
commit
276eb87f41
@ -248,13 +248,44 @@ get_amdgpu_flags() {
|
||||
echo $(printf "%s;" ${AMDGPU_TARGETS[@]})
|
||||
}
|
||||
|
||||
# @FUNCTION: rocm_add_sandbox
|
||||
# @USAGE: [-w]
|
||||
# @DESCRIPTION:
|
||||
# Add AMD GPU/NPU dev nodes to the sandbox predict list.
|
||||
# with -w, add to the sandbox write list.
|
||||
rocm_add_sandbox() {
|
||||
debug-print-function "${FUNCNAME[0]}" "$@"
|
||||
|
||||
local i
|
||||
for i in /dev/kfd /dev/dri/render* /dev/accel/accel*; do
|
||||
if [[ ! -c $i ]]; then
|
||||
continue
|
||||
elif [[ $1 == '-w' ]]; then
|
||||
addwrite "$i"
|
||||
else
|
||||
addpredict "$i"
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
# @FUNCTION: check_amdgpu
|
||||
# @USAGE: check_amdgpu
|
||||
# @DESCRIPTION:
|
||||
# grant and check read-write permissions on AMDGPU devices, die if not available.
|
||||
# Grant and check read-write permissions on AMDGPU and AMDNPU devices.
|
||||
# Die if no AMDGPU devices are available.
|
||||
check_amdgpu() {
|
||||
for device in /dev/kfd /dev/dri/render*; do
|
||||
addwrite ${device}
|
||||
# Common case: no AMDGPU device or the kernel fusion driver is disabled in the kernel.
|
||||
if [[ ! -c /dev/kfd ]]; then
|
||||
eerror "Device /dev/kfd does not exist!"
|
||||
eerror "To proceed, you need to have an AMD GPU and have CONFIG_HSA_AMD set in your kernel config."
|
||||
die "/dev/kfd is missing"
|
||||
fi
|
||||
|
||||
local device
|
||||
for device in /dev/kfd /dev/dri/render* /dev/accel/accel*; do
|
||||
[[ ! -c ${device} ]] && continue
|
||||
|
||||
addwrite "${device}"
|
||||
if [[ ! -r ${device} || ! -w ${device} ]]; then
|
||||
eerror "Cannot read or write ${device}!"
|
||||
eerror "Make sure it is present and check the permission."
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user