control AMDGPU clocks, simplify to 2 profiles: default/custom

This commit is contained in:
Josh Lay 2022-06-04 11:38:40 -05:00
parent b22c8f5d9b
commit c2367b2dc7
Signed by: jlay
GPG key ID: B265E45CACAD108A
4 changed files with 103 additions and 27 deletions

View file

@ -8,9 +8,14 @@
power_cap_half_float: "{{ power_max |float * 0.5 }}"
power_cap: "{{ power_cap_float |int }}"
power_cap_half: "{{ power_cap_half_float |int }}" # used to limit GPU power to 50% on 'low' perf modes
gpu_clock_min: "2300" # minimum GPU clock (in 3D) - defaults 500Mhz
gpu_clock_max: "2600" # maximum GPU clock (also 3D) - range allows up to 3000Mhz. default 2529
gpumem_clock_max: "1075" # maximum GPU memory clock - default 1000Mhz, range allows 1075Mhz
# note: (all clocks based on my non-reference 6900XT)
# consult '/sys/class/drm/{{ card }}/device/pp_od_clk_voltage'
card: card0 # default to card0
base_profiles: # standard tuned profiles available on Fedora, should dynamically discover?
- balanced
base_profiles: # list of source tuned profiles available on Fedora (TODO: should dynamically discover?)
- balanced # these are further modified with secondary (templated) profiles, see 'templates/tuned.conf.j2'
- desktop
- latency-performance
- network-latency
@ -18,12 +23,7 @@
- powersave
- virtual-host
amdgpu_profiles: # statically defined mapping of the contents in /sys/class/drm/{{ card }}/device/pp_power_profile_mode
# - { name: 'bootup_default', value: 0 }
- { name: '3D', value: 1 }
- { name: 'powersaving', value: 2 }
- { name: 'video', value: 3 }
- { name: 'VR', value: 4 }
# - { name: 'compute', value: 5 }
- { name: 'default', value: 0 }
- { name: 'custom', value: 6 }
handlers:
- name: restart tuned
@ -56,6 +56,28 @@
with_nested:
- "{{ amdgpu_profiles }}"
- "{{ base_profiles }}"
- name: template AMDGPU clock control scripts (tuned profile dependency)
template:
src: templates/amdgpu-clock.sh.j2
dest: /etc/tuned/{{ item.1 }}-amdgpu-{{ item.0 }}/amdgpu-clock.sh
owner: root
group: root
mode: "0755"
with_nested:
- 'custom'
- "{{ base_profiles }}"
notify: restart tuned
- name: template AMDGPU clock control *reset* script (tuned profile dependency)
template:
src: templates/amdgpu-clock-reset.sh.j2
dest: /etc/tuned/{{ item.1 }}-amdgpu-{{ item.0 }}/amdgpu-clock-reset.sh
owner: root
group: root
mode: "0755"
with_nested:
- 'default'
- "{{ base_profiles }}"
notify: restart tuned
- name: template custom tuned profiles
template:
src: templates/tuned.conf.j2

View file

@ -0,0 +1,12 @@
#!/bin/bash
# script to reset tuned's AMDGPU clock control to default
#
# rendered by Ansible with environment-appropriate values:
# card #, eg: card0
# min/max GPU clocks
# set control mode back to auto
echo 'auto' | tee /sys/class/drm/{{ card }}/device/power_dpm_force_performance_level
# reset any existing profile clock changes
echo 'r' | tee /sys/class/drm/{{ card }}/device/pp_od_clk_voltage

View file

@ -0,0 +1,33 @@
#!/bin/bash
# script for tuned AMDGPU clock control
# clocks in 3D usage are dynamic based on need/usage
#
# this sets the minimums / maximums
#
# rendered by Ansible with environment-appropriate values:
# card #, eg: card0
# min/max GPU clocks
# send a reset for a clean slate
# echo 'r' | tee /sys/class/drm/{{ card }}/device/pp_od_clk_voltage
# set manual control mode
echo 'manual' | tee /sys/class/drm/{{ card }}/device/power_dpm_force_performance_level
# set the minimum GPU clock
echo 's 0 {{ gpu_clock_min }}' | tee /sys/class/drm/{{ card }}/device/pp_od_clk_voltage
# set the maximum GPU clock
echo 's 1 {{ gpu_clock_max }}' | tee /sys/class/drm/{{ card }}/device/pp_od_clk_voltage
# set the maximum GPU *memory* clock
echo 'm 1 {{ gpumem_clock_max }}' | tee /sys/class/drm/{{ card }}/device/pp_od_clk_voltage
# boost offset voltage 100mV / 0.1V
echo 'vo +100' | tee /sys/class/drm/{{ card }}/device/pp_od_clk_voltage
# commit the changes
echo 'c' | tee /sys/class/drm/{{ card }}/device/pp_od_clk_voltage
# force GPU memory into highest profile
echo '3' | tee /sys/class/drm/{{ card }}/device/pp_dpm_mclk

View file

@ -2,25 +2,6 @@
include={{ item.1 }}
summary={{ item.1 }} + TCP/RAID tweaks + AMDGPU pp_power_profile_mode = {{ item.0.value }} ({{ item.0.name }})
[sysfs]
/sys/class/drm/{{ card }}/device/pp_power_profile_mode = {{ item.0.value }}
{% if 'VR' in item.0.name or '3D' in item.0.name or 'compute' in item.0.name or 'custom' in item.0.name %}
{# TODO: if 'custom' profile: #}
{# set 'power_dpm_force_performance_level' to manual #}
{# set individual clocks (eg: pp_dpm_mclk/pp_dpm_sclk/pp_dpm_pcie) #}
{# with user-provided values for those clocks #}
# configure GPU power/clock characteristics
# ref: https://docs.kernel.org/gpu/amdgpu/thermal.html
/sys/class/drm/{{ card }}/device/power_dpm_force_performance_level = high
# limit perf profiles to {{ power_max_multi * 100.0 |int }}% of the max power capability
/sys/class/drm/{{ card }}/device/hwmon/hwmon9/power1_cap = {{ power_cap }}
{% else %}
# choose power saving dpm clock options
/sys/class/drm/{{ card }}/device/power_dpm_force_performance_level = low
# limit lower power modes to 50% of the max power capability
/sys/class/drm/{{ card }}/device/hwmon/hwmon9/power1_cap = {{ power_cap_half }}
{% endif %}
[sysctl]
net.core.default_qdisc=fq
# 'bbr2' requires a [modified] supporting kernel - stock Fedora kernels do *not* support it (currently)
@ -32,3 +13,31 @@ dev.raid.speed_limit_min=600000
dev.raid.speed_limit_max=9000000
# allow some games to run (eg: DayZ)
vm.max_map_count=1048576
{% if 'default' in item.0.name %}
# reference/execute AMDGPU clock control *reset* script
[gpuresetscript]
type=script
script=${i:PROFILE_DIR}/amdgpu-clock-reset.sh
[sysfs]
# configure GPU power/clock characteristics
# ref: https://docs.kernel.org/gpu/amdgpu/thermal.html
/sys/class/drm/{{ card }}/device/pp_power_profile_mode = {{ item.0.value }}
/sys/class/drm/{{ card }}/device/power_dpm_force_performance_level = auto
# give default profile {{ power_max_multi * 50.0 |int }}% (max) power capability
/sys/class/drm/{{ card }}/device/hwmon/hwmon9/power1_cap = {{ power_cap_half }}
{% endif %}
{% if 'custom' in item.0.name %}
[sysfs]
# configure GPU power/clock characteristics
# ref: https://docs.kernel.org/gpu/amdgpu/thermal.html
/sys/class/drm/{{ card }}/device/pp_power_profile_mode = {{ item.0.value }}
# give this custom oriented profile {{ power_max_multi * 100.0 |int }}% of the power capability
/sys/class/drm/{{ card }}/device/hwmon/hwmon9/power1_cap = {{ power_cap }}
# reference/execute AMDGPU clock control script
[gpuclockscript]
type=script
script=${i:PROFILE_DIR}/amdgpu-clock.sh
{% endif %}