diff --git a/host_vars/localhost.yml b/host_vars/localhost.yml index 9fd8f26..9451555 100644 --- a/host_vars/localhost.yml +++ b/host_vars/localhost.yml @@ -2,7 +2,7 @@ # the profile tries to find the card with displays attached to apply these settings. # configuration of many GPUs not yet supported, one is assumed tuned_amdgpu_clock_min: "500" -tuned_amdgpu_clock_max: "2725" +tuned_amdgpu_clock_max: "2715" tuned_amdgpu_memclock_static: "1075" tuned_amdgpu_power_multi_def: 0.869969040247678 # 281W - real default tuned_amdgpu_power_multi_oc: 1.0 # full board power capability @@ -11,25 +11,16 @@ tuned_amdgpu_power_multi_oc: 1.0 # full board power capability # 310: 0.959752321981424 # sample worksheet in 'power_max multi tab calculator.ods' -tuned_amdgpu_mv_offset: "+70" # add 70mV / 0.07V +tuned_amdgpu_mv_offset: "+45" # add 45mV / 0.045V # '-50' undervolts GPU core voltage 50mV / 0.05V; warning: here be dragons/instability # 'tuned' plugins - used to set the kernel cmdline via bootloader... and sysctl tunables tuned_amdgpu_plugins: # ref: https://github.com/redhat-performance/tuned/tree/master/tuned/plugins - rt_gamescope: - type: script - script: '/etc/tuned/rt_gamescope.sh' # ensures gamescope has proper capability when profile is loaded - cpu: - boost: 1 - governor: performance - energy_perf_bias: performance bootloader: # 'cmdline' allows entries w/ a suffix, names should be unique across *all* profiles. values accept +/- operators - cmdline_amdgpu_general: "delayacct nowatchdog amdgpu.ppfeaturemask=0xfff7ffff" - # cmdline_kvm: 'kvm.ignore_msrs=1 kvm_amd.npt=1' - cmdline_amdgpu_kvm: 'kvm_amd.npt=1' + cmdline_amdgpu_general: "delayacct nowatchdog kvm.ignore_msrs=1 kvm_amd.npt=1 amdgpu.ppfeaturemask=0xfff7ffff" cmdline_amdgpu_hugepages: "default_hugepagesz=1G hugepagesz=1G hugepages=16" cmdline_amdgpu_iommu: "amd_iommu=on iommu=pt" - cmdline_amdgpu_nvme: "nvme_core.default_ps_max_latency_us=0" + cmdline_amdgpu_devs: "nvme_core.default_ps_max_latency_us=0 pci=realloc=off" sysctl: # quote ints/cast to string to avoid surprises kernel.dmesg_restrict: '0' # allow regular users to see the kernel ring buffer # net.core.default_qdisc: fq # congestion control diff --git a/roles/tuned_amdgpu/files/rt_gamescope.sh b/roles/tuned_amdgpu/files/rt_gamescope.sh deleted file mode 100755 index 82ad68b..0000000 --- a/roles/tuned_amdgpu/files/rt_gamescope.sh +++ /dev/null @@ -1,33 +0,0 @@ -#!/bin/bash -# -# script run by 'tuned' to ensure gamescope has the proper capability - -# Check for arguments -if [ $# -eq 0 ]; then - echo "Usage: $0 {verify|start}" - exit 1 -fi - -function verify_cap() { - /usr/sbin/getcap "$(which gamescope)" |& grep 'sys_nice=eip' - return $? -} - -function set_cap() { - /usr/sbin/setcap 'CAP_SYS_NICE=eip' "$(which gamescope)" -} - -# Handle arguments -case "$1" in - verify) - verify_cap - ;; - start) - set_cap - ;; - *) - echo "Invalid argument. Use 'verify' or 'start'." - exit 1 - ;; -esac - diff --git a/roles/tuned_amdgpu/tasks/main.yml b/roles/tuned_amdgpu/tasks/main.yml index 9518ae3..d22e160 100644 --- a/roles/tuned_amdgpu/tasks/main.yml +++ b/roles/tuned_amdgpu/tasks/main.yml @@ -22,12 +22,6 @@ state: present become: true -- name: Ensure 'tuned-ppd' is absent - ansible.builtin.package: - name: tuned-ppd - state: absent - become: true - - name: Find bundled 'tuned' profiles ansible.builtin.find: paths: @@ -91,15 +85,6 @@ notify: Restart tuned become: true -- name: Copy gamescope RT capability script - ansible.builtin.copy: - src: rt_gamescope.sh - dest: /etc/tuned/rt_gamescope.sh - owner: root - group: root - mode: '0755' - become: true - - name: Template tuned.conf for custom profiles ansible.builtin.template: src: templates/tuned.conf.j2 diff --git a/roles/tuned_amdgpu/templates/amdgpu-profile.sh.j2 b/roles/tuned_amdgpu/templates/amdgpu-profile.sh.j2 index b7fca2d..e15e5f7 100644 --- a/roles/tuned_amdgpu/templates/amdgpu-profile.sh.j2 +++ b/roles/tuned_amdgpu/templates/amdgpu-profile.sh.j2 @@ -9,8 +9,6 @@ # Variables shown below named '$TUNED_...' are given values by '/etc/tuned/amdgpu-profile-vars.conf' # # determine the connected GPU using the DRM subsystem. FIXME: assumes one card, make configurable -# TODO: break this out into a role var; assume renderD128 -# allow/use many configs CARD=$(/usr/bin/grep -ls ^connected /sys/class/drm/*/status | /usr/bin/grep -o 'card[0-9]' | /usr/bin/sort | /usr/bin/uniq | /usr/bin/sort -h | /usr/bin/tail -1) function get_hwmon_dir() { @@ -23,6 +21,7 @@ function get_hwmon_dir() { done } + # determine the hwmon directory HWMON_DIR=$(get_hwmon_dir "${CARD}") @@ -52,16 +51,11 @@ function amdgpu_profile_reset() { echo "${PROF_DEFAULT_NUM}" | tee /sys/class/drm/"${CARD}"/device/pp_power_profile_mode # delay before returning - have mercy, may be followed with other profile function calls - sleep 0.25 + sleep 0.5 } function amdgpu_profile_overclock() { - # set manual control mode - # allows control via 'pp_dpm_mclk', 'pp_dpm_sclk', 'pp_dpm_pcie', 'pp_dpm_fclk', and 'pp_power_profile_mode' files - echo 'manual' | tee /sys/class/drm/"${CARD}"/device/power_dpm_force_performance_level - - # force GPU core and memory into highest clocks (fix flickering and poor power management) echo "s 0 ${TUNED_tuned_amdgpu_clock_min}" | tee /sys/class/drm/"${CARD}"/device/pp_od_clk_voltage echo "s 1 ${TUNED_tuned_amdgpu_clock_max}" | tee /sys/class/drm/"${CARD}"/device/pp_od_clk_voltage echo "m 1 ${TUNED_tuned_amdgpu_memclock_static}" | tee /sys/class/drm/"${CARD}"/device/pp_od_clk_voltage @@ -74,6 +68,11 @@ function amdgpu_profile_overclock() { # commit the changes echo 'c' | tee /sys/class/drm/"${CARD}"/device/pp_od_clk_voltage + # force GPU core and memory into highest clocks (fix flickering and poor power management) + # set manual control mode + # allows control via 'pp_dpm_mclk', 'pp_dpm_sclk', 'pp_dpm_pcie', 'pp_dpm_fclk', and 'pp_power_profile_mode' files + echo 'manual' | tee /sys/class/drm/"${CARD}"/device/power_dpm_force_performance_level + # adjust power limit using multiplier against board capability POWER_LIM_OC=$(/usr/bin/awk -v m="$POWER_CAP" -v n="${TUNED_tuned_amdgpu_power_multi_oc}" 'BEGIN {printf "%.0f", (m*n)}') echo "$POWER_LIM_OC" | tee "${HWMON_DIR}/power1_cap" @@ -89,7 +88,7 @@ function amdgpu_profile_overclock() { echo "${PROF_VR_NUM}" | tee /sys/class/drm/"${CARD}"/device/pp_power_profile_mode # delay before returning - have mercy, may be followed with other profile function calls - sleep 0.25 + sleep 0.5 }