diff --git a/README.md b/README.md
index 22deb92..d59c9e9 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
# tuned-amdgpu
-Hacky solution to integrate AMDGPU power profile control in `tuned` with Ansible
+Hacky solution to integrate AMDGPU power control and overclocking in `tuned` with Ansible
Takes a list of existing `tuned` profiles and creates new ones based on them. These new profiles include AMDGPU power/clock parameters
@@ -10,7 +10,7 @@ $ grep -ls ^connected /sys/class/drm/*/status | grep -o card[0-9] | sort | uniq
card1
```
-_Warning_: This is only tested with `RX6000` series GPUs, it is probable that older AMD GPUs will not work properly. Use at your own risk!
+_Warning_: This is only tested with `RX6000` series GPUs, it is probable that other generations will *not* work properly. Use at your own risk!
## Profiles
@@ -19,10 +19,7 @@ An example of the output/provided profiles follow
| Output profile | Description |
|:---|---|
| `balanced-amdgpu-default` | Includes the (assumed) existing `balanced` tuned profile.
Only adjusts the GPU power limit (typically lower). Clocks/voltage curve remain the default. |
-| `desktop-amdgpu-VR` | Includes the (assumed) existing `desktop` tuned profile.
Adjusts the GPU power limit, clocks, _and_ the voltage curve.
Uses the predefined `VR` profile in the driver. See `/sys/class/drm/card*/device/pp_power_profile_mode` |
-| `latency-performance-amdgpu-custom` | Includes the existing `latency-performance` tuned profile.
Like the existing GPU profiles (eg: _VR)), this also adjusts the GPU power limit, clocks, _and_ the voltage curve.
This differs by using the `custom` profile in the driver. This opens up further tweaking of the power/clock heuristics through the driver (currently manual). see: [pp-dpm](https://docs.kernel.org/gpu/amdgpu/thermal.html#pp-dpm) |
-
-**Note**: This is non-exhaustive, see the variables `base_profiles` and `amdgpu_profiles` below for the (default) sources of the merged profile mapping
+| `desktop-amdgpu-overclock` | Includes the (assumed) existing `desktop` tuned profile.
Adjusts the GPU power limit, clocks, _and_ the voltage curve. |
## Notable variables
@@ -35,5 +32,4 @@ These are the variables you're likely to want to change. They are defined in [p
| gpumem_clock_static | Sets the _static_ memory clock for the GPU (in `MHz`). This is *not* the _effective_ data rate. That is a multiple of this depending on the type of VRAM.
To avoid flickering this does *not* change dynamically with load. | `1050`, results in just over `1GHz`; mild overclock
Actual effective clock depends on this being multiplied against the data/pump rate of the `GDDR?` GPU memory |
| gpu_mv_offset | GPU core voltage offset. Takes +/- some integer in millivolts. Can be used to both over _and_ under volt. | `-50` (undervolt `50mV` or `0.05V`) |
| base_profiles | List of base tuned profiles to clone in the new AMDGPU profiles. Defaults based on `Fedora` |
default:| - +| gpu_power_multi | Dictionary with two keys, `default` and `overclock`. Expects two floats to set a power limit relative to the board _capability_. Example: `1.0` is full board capability, `0.5` is 50%.| diff --git a/playbook.yml b/playbook.yml index d47f7bd..e710f7b 100644 --- a/playbook.yml +++ b/playbook.yml @@ -8,10 +8,13 @@ # note: 'gpu_*' vars only apply with the 'custom' suffixed profiles created by this tooling # profiles based on the 'default' amdgpu power profile mode use default clocks gpu_clock_min: "500" # default 500 - gpu_clock_max: "2600" # default 2529 + gpu_clock_max: "2615" # default 2529 gpumem_clock_static: "1075" + gpu_power_multi: + default: 0.789473684210526 # 255W - slightly reduced, maintains clocks well + overclock: 0.869969040247678 # 281W - real default, board supports up to 323W (1.0) # optional, applies offset (+/-) to GPU voltage by provided mV - gpu_mv_offset: "-50" + # gpu_mv_offset: "-25" # '-50' undervolts GPU core voltage 50mV or 0.05V # # list of source tuned profiles available on Fedora (TODO: should dynamically discover) @@ -29,24 +32,24 @@ # ref: https://www.kernel.org/doc/html/v4.20/gpu/amdgpu.html#pp-power-profile-mode # 'pwr_cap_multi' is multiplied against board *limit* to determine profile wattage; 0.5 = 50% # values below reflect my 6900XT - amdgpu_profiles: - default: - pwrmode: 0 - pwr_cap_multi: 0.789473684210526 # 255W - slightly reduced, maintains clocks well - 3D: - pwrmode: 1 - pwr_cap_multi: 0.869969040247678 # 281W - default - powersave: - pwrmode: 2 - pwr_cap_multi: 0.869969040247678 - VR: - pwrmode: 4 - pwr_cap_multi: 0.869969040247678 - compute: - pwrmode: 5 - pwr_cap_multi: 0.869969040247678 - custom: - pwrmode: 6 - pwr_cap_multi: 1.0 # 323W - full capability +# amdgpu_profiles: +# default: +# pwrmode: 0 +# pwr_cap_multi: 0.789473684210526 # 255W - slightly reduced, maintains clocks well +# 3D: +# pwrmode: 1 +# pwr_cap_multi: 0.869969040247678 # 281W - default +# powersave: +# pwrmode: 2 +# pwr_cap_multi: 0.869969040247678 +# VR: +# pwrmode: 4 +# pwr_cap_multi: 0.869969040247678 +# compute: +# pwrmode: 5 +# pwr_cap_multi: 0.869969040247678 +# custom: +# pwrmode: 6 +# pwr_cap_multi: 1.0 # 323W - full capability # both dictionaries are merged to create new 'tuned' profiles. eg: # 'balanced-amdgpu-default', 'balanced-amdgpu-3D', 'balanced-amdgpu-video' diff --git a/roles/tuned_amdgpu/defaults/main.yml b/roles/tuned_amdgpu/defaults/main.yml index de80a28..aa86c15 100644 --- a/roles/tuned_amdgpu/defaults/main.yml +++ b/roles/tuned_amdgpu/defaults/main.yml @@ -8,8 +8,20 @@ board_watts: "{{ power_max | int / 1000000 }}" # internals for profile power calculations # item in the context of the with_nested loops in the play -profile_name: "{{ item.0.key }}" -profile_percentage: "{{ (item.0.value.pwr_cap_multi * 100.0) | round(2) }}" -profile_multi: "{{ item.0.value.pwr_cap_multi }}" -profile_microwatts: "{{ power_max | float * profile_multi | float }}" -profile_watts: "{{ profile_microwatts | int / 1000000 }}" +profile_name: "{{ item.0 }}" + +# determine percentage for human-friendly comments +power_default_pct: "{{ (gpu_power_multi.default * 100.0) | round(2) }}" +power_oc_pct: "{{ (gpu_power_multi.overclock * 100.0) | round(2) }}" + +# in microWatts, actually written to sysfs +power_default_mw: "{{ (power_max | float) * (gpu_power_multi.default | float) }}" +power_oc_mw: "{{ (power_max | float) * (gpu_power_multi.overclock | float) }}" + +# wattages - more human-friendly comments +power_default_watts: "{{ (power_default_mw | int) / 1000000 }}" +power_oc_watts: "{{ (power_oc_mw | int) / 1000000 }}" + +amdgpu_profiles: + - default + - overclock diff --git a/roles/tuned_amdgpu/tasks/main.yml b/roles/tuned_amdgpu/tasks/main.yml index 5f93274..c428dda 100644 --- a/roles/tuned_amdgpu/tasks/main.yml +++ b/roles/tuned_amdgpu/tasks/main.yml @@ -63,22 +63,22 @@ - name: Create custom profile directories ansible.builtin.file: state: directory - path: /etc/tuned/{{ item.1 }}-amdgpu-{{ item.0.key }} + path: /etc/tuned/{{ item.1 }}-amdgpu-{{ item.0 }} mode: "0755" with_nested: - - "{{ lookup('dict', amdgpu_profiles) }}" + - "{{ amdgpu_profiles }}" - "{{ base_profiles }}" become: true - name: Template AMDGPU control/reset scripts ansible.builtin.template: src: templates/amdgpu-clock.sh.j2 - dest: /etc/tuned/{{ item.1 }}-amdgpu-{{ item.0.key }}/amdgpu-clock.sh + dest: /etc/tuned/{{ item.1 }}-amdgpu-{{ item.0 }}/amdgpu-clock.sh owner: root group: root mode: "0755" with_nested: - - "{{ lookup('dict', amdgpu_profiles) }}" + - "{{ amdgpu_profiles }}" - "{{ base_profiles }}" notify: Restart tuned become: true @@ -86,12 +86,12 @@ - name: Template custom tuned profiles ansible.builtin.template: src: templates/tuned.conf.j2 - dest: /etc/tuned/{{ item.1 }}-amdgpu-{{ item.0.key }}/tuned.conf + dest: /etc/tuned/{{ item.1 }}-amdgpu-{{ item.0 }}/tuned.conf owner: root group: root mode: "0644" with_nested: - - "{{ lookup('dict', amdgpu_profiles) }}" + - "{{ amdgpu_profiles }}" - "{{ base_profiles }}" notify: Restart tuned become: true diff --git a/roles/tuned_amdgpu/templates/amdgpu-clock.sh.j2 b/roles/tuned_amdgpu/templates/amdgpu-clock.sh.j2 index 90c5f0b..28af80a 100644 --- a/roles/tuned_amdgpu/templates/amdgpu-clock.sh.j2 +++ b/roles/tuned_amdgpu/templates/amdgpu-clock.sh.j2 @@ -20,9 +20,6 @@ CARD=$(/usr/bin/grep -ls ^connected /sys/class/drm/*/status | /usr/bin/grep -o ' {# begin the templated script for 'default' profiles to reset state #} {% if 'default' in profile_name %} -# set power state transition heuristics to default -echo '{{ item.0.value.pwrmode }}' | tee /sys/class/drm/"${CARD}"/device/pp_power_profile_mode - # set control mode back to auto # attempts to dynamically set optimal power profile for (load) conditions echo 'auto' | tee /sys/class/drm/"${CARD}"/device/power_dpm_force_performance_level @@ -30,23 +27,20 @@ echo 'auto' | tee /sys/class/drm/"${CARD}"/device/power_dpm_force_performance_le # reset any existing profile clock changes echo 'r' | tee /sys/class/drm/"${CARD}"/device/pp_od_clk_voltage -# give '{{ profile_name }}' profile ~{{ profile_percentage }}% (rounded) of the max power capability -# {{ profile_watts }} Watts of {{ board_watts }} total -echo '{{ profile_microwatts | int }}' | tee '{{ powercap_set.files.0.path }}' +# give '{{ profile_name }}' profile ~{{ power_default_pct }}% (rounded) of the max power capability +# {{ power_default_watts }} Watts of {{ board_watts }} total +echo '{{ power_default_mw | int }}' | tee '{{ powercap_set.files.0.path }}' {% else %} {# begin the templated script for non-default AMD GPU profiles, eg: 'VR' or '3D_FULL_SCREEN' #} # set manual control mode # allows control via 'pp_dpm_mclk', 'pp_dpm_sclk', 'pp_dpm_pcie', 'pp_dpm_fclk', and 'pp_power_profile_mode' files # only interested in 'pp_power_profile_mode' for power and 'pp_dpm_mclk' for memory clock (flickering). # GPU clocks are dynamic based on (load) condition -echo 'manual' | tee /sys/class/drm/"${CARD}"/device/power_dpm_force_performance_level +#echo 'manual' | tee /sys/class/drm/"${CARD}"/device/power_dpm_force_performance_level -# set power state transition heuristics to '{{ profile_name }}' profile -echo '{{ item.0.value.pwrmode }}' | tee /sys/class/drm/"${CARD}"/device/pp_power_profile_mode - -# give '{{ profile_name }}' profile ~{{ profile_percentage }}% (rounded) of the max power capability -# {{ profile_watts }} Watts of {{ board_watts }} total -echo '{{ profile_microwatts | int }}' | tee '{{ powercap_set.files.0.path }}' +# give '{{ profile_name }}' profile ~{{ power_oc_pct }}% (rounded) of the max power capability +# {{ power_oc_watts }} Watts of {{ board_watts }} total +echo '{{ power_oc_mw | int }}' | tee '{{ powercap_set.files.0.path }}' # set the minimum GPU clock echo 's 0 {{ gpu_clock_min }}' | tee /sys/class/drm/"${CARD}"/device/pp_od_clk_voltage @@ -54,7 +48,8 @@ echo 's 0 {{ gpu_clock_min }}' | tee /sys/class/drm/"${CARD}"/device/pp_od_clk_v # set the maximum GPU clock echo 's 1 {{ gpu_clock_max }}' | tee /sys/class/drm/"${CARD}"/device/pp_od_clk_voltage -# set the maximum GPU *memory* clock +# set the minimum / maximum GPU *memory* clock - force it high +echo 'm 0 {{ gpumem_clock_static }}' | tee /sys/class/drm/"${CARD}"/device/pp_od_clk_voltage echo 'm 1 {{ gpumem_clock_static }}' | tee /sys/class/drm/"${CARD}"/device/pp_od_clk_voltage {% if gpu_mv_offset is defined %} @@ -68,5 +63,9 @@ echo 'c' | tee /sys/class/drm/"${CARD}"/device/pp_od_clk_voltage # force GPU memory into highest clock (fix flickering) # pp_dpm_*clk settings are unintuitive, giving profiles that may be used # opt not to set the others (eg: sclk/fclk) - those should remain for benefits from the curve -echo '3' | tee /sys/class/drm/"${CARD}"/device/pp_dpm_mclk +# echo '3' | tee /sys/class/drm/"${CARD}"/device/pp_dpm_mclk + +# note 4/8/2023: instead of 'manual'... deal with broken power management, force clocks to high +# ref: https://gitlab.freedesktop.org/drm/amd/-/issues/1500 +echo 'high' | tee /sys/class/drm/"${CARD}"/device/power_dpm_force_performance_level {% endif %} diff --git a/roles/tuned_amdgpu/templates/tuned.conf.j2 b/roles/tuned_amdgpu/templates/tuned.conf.j2 index 7ed559b..ae98cd9 100644 --- a/roles/tuned_amdgpu/templates/tuned.conf.j2 +++ b/roles/tuned_amdgpu/templates/tuned.conf.j2 @@ -1,8 +1,10 @@ [main] include={{ item.1 }} -summary={{ item.1 }} + TCP/RAID tweaks + AMDGPU pp_power_profile_mode = {{ item.0.value.pwrmode }} ({{ item.0.key }}) +summary={{ item.1 }} + TCP/RAID tweaks + AMDGPU {{ item.0 }} [sysctl] +# allow regular users to see the kernel ring buffer +kernel.dmesg_restrict=0 net.core.default_qdisc=fq # 'bbr2' requires a [modified] supporting kernel - stock Fedora kernels do *not* support it (currently) # eg: 'kernel-xanmode-edge' from COPR 'rmnscnce/kernel-xanmod' @@ -25,4 +27,6 @@ script=${i:PROFILE_DIR}/amdgpu-clock.sh [ssdnosched] type=disk devices_udev_regex=(ID_ATA_ROTATION_RATE_RPM=0) -elevator=none +# elevator=none +elevator=kyber +# elevator=mq-deadline
pwrmode: 0
pwr_cap_multi: 0.75
# 75% relatively safe default
VR:
pwrmode: 4
pwr_cap_multi: 0.8
# 80%, likely slight boost
custom:
pwrmode: 6
pwr_cap_multi: 1.0
# 100%, full GPU board capability
# warning: significantly increased heat