diff --git a/README.md b/README.md index efad23b..052e77f 100644 --- a/README.md +++ b/README.md @@ -3,14 +3,14 @@ Hacky solution to integrate AMDGPU power/clock control into `tuned` profiles with Ansible. -Provides three variations of the `tuned` profiles found in `/usr/lib/tuned` +Extends every `tuned` profile found in `/usr/lib/tuned` using the [AMDGPU hwmon interfaces](https://docs.kernel.org/gpu/amdgpu/thermal.html): -- `default`: the out-of-the-box configuration -- `overclock`: the optimized card configuration. Includes all of the clock/voltage/power settings -- `peak`: the same as `overclock`, but with clock gating removed. May help profiling +- `default`: the out-of-the-box GPU clock/power configuration +- `overclock`: the _optimized_ card configuration. Includes the clock/voltage/power settings outlined below. +- `peak`: the same as `overclock`, but with clock gating removed. May help profiling. -Contrary to the name, `overclock` can be used to de-tune the card as well. +Contrary to the name, the `overclock` profiles can be used to de-tune the card as well. _Warning_: This is only tested with `RX6000` series GPUs, others may _not_ work properly. Use at your own risk! @@ -34,15 +34,14 @@ The playbook will render/make effective this config file: `/etc/tuned/amdgpu-pro Here is a preview: ```ini -gpu_clock_min=500 -gpu_clock_max=2715 -gpumem_clock_static=1075 -gpu_power_multi_def=0.869969040247678 -gpu_power_multi_oc=1.0 -gpu_mv_offset=+60 +tuned_amdgpu_clock_min=500 +tuned_amdgpu_clock_max=2715 +tuned_amdgpu_memclock_static=1075 +tuned_amdgpu_power_multi_def=0.869969040247678 +tuned_amdgpu_power_multi_oc=1.0 +tuned_amdgpu_mv_offset=+60 ``` - -Changes outside of _Ansible_ are not immediately effective. Switching `tuned` profiles or restarting the service would be required. +These are the result of [Variables](#Variables) below; changes outside of _Ansible_ are not immediately effective. Switching `tuned` profiles or restarting the service would be required. One can use `gamemode` for dynamic switching. Sample `~/.config/gamemode.ini` below: @@ -60,9 +59,9 @@ These are the variables you'll want to change/consider. | Variable | Description | |------------------------|---------------------------------------------------------------------------------------| -| gpu_clock_min | Sets the min (dynamic) GPU clock (in `Mhz`) for the non-default `amdgpu` profiles | -| gpu_clock_max | Sets the max (dynamic) GPU clock (in `MHz`) for the non-default `amdgpu` profiles | -| gpumem_clock_static | Sets the _static_ memory clock for the GPU (in `MHz`). This is *not* the _effective_ data rate. _That_ would be a multiple of _this_ depending on the type of VRAM.

To avoid flickering this is *not* allowed to change with load, only between `default` and `overclock`/`peak` profiles. | -| gpu_mv_offset | GPU core voltage offset. Takes +/- some integer in millivolts. Can be used to both over _and_ under volt. eg: `-50` _(undervolt `50mV` or `0.05V`)_ | -| gpu_power_multi_def | Float between `0.0` and `1.0`; controls power limit relative to the board _capability_. For _'default'_-named power profiles. | -| gpu_power_multi_oc | Similar to `gpu_power_multi_def`, for _'overclock'_-named power profiles. | +| `tuned_amdgpu_clock_min` | Sets the min (dynamic) GPU clock (in `Mhz`) for the non-default `amdgpu` profiles | +| `tuned_amdgpu_clock_max` | Sets the max (dynamic) GPU clock (in `MHz`) for the non-default `amdgpu` profiles | +| `tuned_amdgpu_memclock_static` | Sets the _static_ memory clock for the GPU (in `MHz`). This is *not* the _effective_ data rate. _That_ would be a multiple of _this_ depending on the type of VRAM.

To avoid flickering this is *not* allowed to change with load, only between `default` and `overclock`/`peak` profiles. | +| `tuned_amdgpu_mv_offset` | GPU core voltage offset. Takes +/- some integer in millivolts. Can be used to both over _and_ under volt. eg: `-50` _(undervolt `50mV` or `0.05V`)_ | +| `tuned_amdgpu_power_multi_def` | Float between `0.0` and `1.0`; controls power limit relative to the board _capability_. For _'default'_-named power profiles. | +| `tuned_amdgpu_power_multi_oc` | Similar to `tuned_amdgpu_power_multi_def`, for resulting _`overclock` and `peak` power profiles. | diff --git a/host_vars/localhost.yml b/host_vars/localhost.yml index 8687151..9451555 100644 --- a/host_vars/localhost.yml +++ b/host_vars/localhost.yml @@ -1,26 +1,21 @@ --- # the profile tries to find the card with displays attached to apply these settings. # configuration of many GPUs not yet supported, one is assumed -gpu_clock_min: "500" -gpu_clock_max: "2715" -gpumem_clock_static: "1075" -gpu_power_multi_def: 0.869969040247678 # 281W - real default -gpu_power_multi_oc: 1.0 # full board power capability +tuned_amdgpu_clock_min: "500" +tuned_amdgpu_clock_max: "2715" +tuned_amdgpu_memclock_static: "1075" +tuned_amdgpu_power_multi_def: 0.869969040247678 # 281W - real default +tuned_amdgpu_power_multi_oc: 1.0 # full board power capability # other multipliers for 323W boards like mine: # 300W: 0.928792569659443 # 310: 0.959752321981424 # sample worksheet in 'power_max multi tab calculator.ods' -gpu_mv_offset: "+25" # add 25mV or 0.025V -# gpu_mv_offset: "+75" # add 75mV or 0.075V -# gpu_mv_offset: "+150" # add 150mV or 0.15V -# gpu_mv_offset: "+133" # add 133mV or 0.133V -# gpu_mv_offset: "+75" # add 75mV or 0.075V -# gpu_mv_offset: "+125" # add 125mV or 0.125V -# '-50' undervolts GPU core voltage 50mV or 0.05V; untested - here be dragons/instability +tuned_amdgpu_mv_offset: "+45" # add 45mV / 0.045V +# '-50' undervolts GPU core voltage 50mV / 0.05V; warning: here be dragons/instability # 'tuned' plugins - used to set the kernel cmdline via bootloader... and sysctl tunables -plugins: # ref: https://github.com/redhat-performance/tuned/tree/master/tuned/plugins +tuned_amdgpu_plugins: # ref: https://github.com/redhat-performance/tuned/tree/master/tuned/plugins bootloader: # 'cmdline' allows entries w/ a suffix, names should be unique across *all* profiles. values accept +/- operators cmdline_amdgpu_general: "delayacct nowatchdog kvm.ignore_msrs=1 kvm_amd.npt=1 amdgpu.ppfeaturemask=0xfff7ffff" cmdline_amdgpu_hugepages: "default_hugepagesz=1G hugepagesz=1G hugepages=16" diff --git a/roles/tuned_amdgpu/tasks/main.yml b/roles/tuned_amdgpu/tasks/main.yml index 7910010..d22e160 100644 --- a/roles/tuned_amdgpu/tasks/main.yml +++ b/roles/tuned_amdgpu/tasks/main.yml @@ -57,12 +57,12 @@ mode: '0644' when: vars[item] is defined with_items: - - gpu_clock_min - - gpu_clock_max - - gpumem_clock_static - - gpu_power_multi_def - - gpu_power_multi_oc - - gpu_mv_offset + - tuned_amdgpu_clock_min + - tuned_amdgpu_clock_max + - tuned_amdgpu_memclock_static + - tuned_amdgpu_power_multi_def + - tuned_amdgpu_power_multi_oc + - tuned_amdgpu_mv_offset become: true - name: Create custom profile directories diff --git a/roles/tuned_amdgpu/templates/amdgpu-profile.sh.j2 b/roles/tuned_amdgpu/templates/amdgpu-profile.sh.j2 index 57af6b6..e15e5f7 100644 --- a/roles/tuned_amdgpu/templates/amdgpu-profile.sh.j2 +++ b/roles/tuned_amdgpu/templates/amdgpu-profile.sh.j2 @@ -41,7 +41,7 @@ function amdgpu_profile_reset() { echo 'r' | tee /sys/class/drm/"${CARD}"/device/pp_od_clk_voltage # adjust power limit using multiplier against board capability - POWER_LIM_DEFAULT=$(/usr/bin/awk -v m="$POWER_CAP" -v n="${TUNED_gpu_power_multi_def}" 'BEGIN {printf "%.0f", (m*n)}') + POWER_LIM_DEFAULT=$(/usr/bin/awk -v m="$POWER_CAP" -v n="${TUNED_tuned_amdgpu_power_multi_def}" 'BEGIN {printf "%.0f", (m*n)}') echo "$POWER_LIM_DEFAULT" | tee "${HWMON_DIR}/power1_cap" # extract the power-saving profile ID number @@ -56,13 +56,13 @@ function amdgpu_profile_reset() { } function amdgpu_profile_overclock() { - echo "s 0 ${TUNED_gpu_clock_min}" | tee /sys/class/drm/"${CARD}"/device/pp_od_clk_voltage - echo "s 1 ${TUNED_gpu_clock_max}" | tee /sys/class/drm/"${CARD}"/device/pp_od_clk_voltage - echo "m 1 ${TUNED_gpumem_clock_static}" | tee /sys/class/drm/"${CARD}"/device/pp_od_clk_voltage + echo "s 0 ${TUNED_tuned_amdgpu_clock_min}" | tee /sys/class/drm/"${CARD}"/device/pp_od_clk_voltage + echo "s 1 ${TUNED_tuned_amdgpu_clock_max}" | tee /sys/class/drm/"${CARD}"/device/pp_od_clk_voltage + echo "m 1 ${TUNED_tuned_amdgpu_memclock_static}" | tee /sys/class/drm/"${CARD}"/device/pp_od_clk_voltage # under/over-voltage is considered optional or less likely to be defined, checked before use - if [[ -n ${TUNED_gpu_mv_offset} ]]; then - echo "vo ${TUNED_gpu_mv_offset}" | tee /sys/class/drm/"${CARD}"/device/pp_od_clk_voltage + if [[ -n ${TUNED_tuned_amdgpu_mv_offset} ]]; then + echo "vo ${TUNED_tuned_amdgpu_mv_offset}" | tee /sys/class/drm/"${CARD}"/device/pp_od_clk_voltage fi # commit the changes @@ -74,7 +74,7 @@ function amdgpu_profile_overclock() { echo 'manual' | tee /sys/class/drm/"${CARD}"/device/power_dpm_force_performance_level # adjust power limit using multiplier against board capability - POWER_LIM_OC=$(/usr/bin/awk -v m="$POWER_CAP" -v n="${TUNED_gpu_power_multi_oc}" 'BEGIN {printf "%.0f", (m*n)}') + POWER_LIM_OC=$(/usr/bin/awk -v m="$POWER_CAP" -v n="${TUNED_tuned_amdgpu_power_multi_oc}" 'BEGIN {printf "%.0f", (m*n)}') echo "$POWER_LIM_OC" | tee "${HWMON_DIR}/power1_cap" # avoid display flickering, force OC'd memory to highest clock diff --git a/roles/tuned_amdgpu/templates/tuned.conf.j2 b/roles/tuned_amdgpu/templates/tuned.conf.j2 index 3ddf2f0..25dec25 100644 --- a/roles/tuned_amdgpu/templates/tuned.conf.j2 +++ b/roles/tuned_amdgpu/templates/tuned.conf.j2 @@ -14,8 +14,8 @@ include=/etc/tuned/amdgpu-profile-vars.conf type=script script={{ (tuned_amdgpu_profile_dir, 'amdgpu-clock.sh') | ansible.builtin.path_join }} {# call the state-managing script with the selected profile, item.0, as an argument #} -{% if plugins is defined %} -{% for section, options in plugins.items() %} +{% if tuned_amdgpu_plugins is defined %} +{% for section, options in tuned_amdgpu_plugins.items() %} {#+ give each plugin section some space +#} [{{ section }}] {% for key, value in options.items() %}