From c2367b2dc767e9c3eacb736cbeb0c08ff0006cff Mon Sep 17 00:00:00 2001 From: Josh Lay Date: Sat, 4 Jun 2022 11:38:40 -0500 Subject: [PATCH] control AMDGPU clocks, simplify to 2 profiles: default/custom --- playbook.yml | 38 +++++++++++++++++++----- templates/amdgpu-clock-reset.sh.j2 | 12 ++++++++ templates/amdgpu-clock.sh.j2 | 33 +++++++++++++++++++++ templates/tuned.conf.j2 | 47 ++++++++++++++++++------------ 4 files changed, 103 insertions(+), 27 deletions(-) create mode 100644 templates/amdgpu-clock-reset.sh.j2 create mode 100644 templates/amdgpu-clock.sh.j2 diff --git a/playbook.yml b/playbook.yml index 17c0708..c6ba71e 100644 --- a/playbook.yml +++ b/playbook.yml @@ -8,9 +8,14 @@ power_cap_half_float: "{{ power_max |float * 0.5 }}" power_cap: "{{ power_cap_float |int }}" power_cap_half: "{{ power_cap_half_float |int }}" # used to limit GPU power to 50% on 'low' perf modes + gpu_clock_min: "2300" # minimum GPU clock (in 3D) - defaults 500Mhz + gpu_clock_max: "2600" # maximum GPU clock (also 3D) - range allows up to 3000Mhz. default 2529 + gpumem_clock_max: "1075" # maximum GPU memory clock - default 1000Mhz, range allows 1075Mhz + # note: (all clocks based on my non-reference 6900XT) + # consult '/sys/class/drm/{{ card }}/device/pp_od_clk_voltage' card: card0 # default to card0 - base_profiles: # standard tuned profiles available on Fedora, should dynamically discover? - - balanced + base_profiles: # list of source tuned profiles available on Fedora (TODO: should dynamically discover?) + - balanced # these are further modified with secondary (templated) profiles, see 'templates/tuned.conf.j2' - desktop - latency-performance - network-latency @@ -18,12 +23,7 @@ - powersave - virtual-host amdgpu_profiles: # statically defined mapping of the contents in /sys/class/drm/{{ card }}/device/pp_power_profile_mode -# - { name: 'bootup_default', value: 0 } - - { name: '3D', value: 1 } - - { name: 'powersaving', value: 2 } - - { name: 'video', value: 3 } - - { name: 'VR', value: 4 } -# - { name: 'compute', value: 5 } + - { name: 'default', value: 0 } - { name: 'custom', value: 6 } handlers: - name: restart tuned @@ -56,6 +56,28 @@ with_nested: - "{{ amdgpu_profiles }}" - "{{ base_profiles }}" + - name: template AMDGPU clock control scripts (tuned profile dependency) + template: + src: templates/amdgpu-clock.sh.j2 + dest: /etc/tuned/{{ item.1 }}-amdgpu-{{ item.0 }}/amdgpu-clock.sh + owner: root + group: root + mode: "0755" + with_nested: + - 'custom' + - "{{ base_profiles }}" + notify: restart tuned + - name: template AMDGPU clock control *reset* script (tuned profile dependency) + template: + src: templates/amdgpu-clock-reset.sh.j2 + dest: /etc/tuned/{{ item.1 }}-amdgpu-{{ item.0 }}/amdgpu-clock-reset.sh + owner: root + group: root + mode: "0755" + with_nested: + - 'default' + - "{{ base_profiles }}" + notify: restart tuned - name: template custom tuned profiles template: src: templates/tuned.conf.j2 diff --git a/templates/amdgpu-clock-reset.sh.j2 b/templates/amdgpu-clock-reset.sh.j2 new file mode 100644 index 0000000..5e9b34b --- /dev/null +++ b/templates/amdgpu-clock-reset.sh.j2 @@ -0,0 +1,12 @@ +#!/bin/bash +# script to reset tuned's AMDGPU clock control to default +# +# rendered by Ansible with environment-appropriate values: +# card #, eg: card0 +# min/max GPU clocks + +# set control mode back to auto +echo 'auto' | tee /sys/class/drm/{{ card }}/device/power_dpm_force_performance_level + +# reset any existing profile clock changes +echo 'r' | tee /sys/class/drm/{{ card }}/device/pp_od_clk_voltage diff --git a/templates/amdgpu-clock.sh.j2 b/templates/amdgpu-clock.sh.j2 new file mode 100644 index 0000000..14bac41 --- /dev/null +++ b/templates/amdgpu-clock.sh.j2 @@ -0,0 +1,33 @@ +#!/bin/bash +# script for tuned AMDGPU clock control +# clocks in 3D usage are dynamic based on need/usage +# +# this sets the minimums / maximums +# +# rendered by Ansible with environment-appropriate values: +# card #, eg: card0 +# min/max GPU clocks + +# send a reset for a clean slate +# echo 'r' | tee /sys/class/drm/{{ card }}/device/pp_od_clk_voltage + +# set manual control mode +echo 'manual' | tee /sys/class/drm/{{ card }}/device/power_dpm_force_performance_level + +# set the minimum GPU clock +echo 's 0 {{ gpu_clock_min }}' | tee /sys/class/drm/{{ card }}/device/pp_od_clk_voltage + +# set the maximum GPU clock +echo 's 1 {{ gpu_clock_max }}' | tee /sys/class/drm/{{ card }}/device/pp_od_clk_voltage + +# set the maximum GPU *memory* clock +echo 'm 1 {{ gpumem_clock_max }}' | tee /sys/class/drm/{{ card }}/device/pp_od_clk_voltage + +# boost offset voltage 100mV / 0.1V +echo 'vo +100' | tee /sys/class/drm/{{ card }}/device/pp_od_clk_voltage + +# commit the changes +echo 'c' | tee /sys/class/drm/{{ card }}/device/pp_od_clk_voltage + +# force GPU memory into highest profile +echo '3' | tee /sys/class/drm/{{ card }}/device/pp_dpm_mclk diff --git a/templates/tuned.conf.j2 b/templates/tuned.conf.j2 index 1513389..90ff588 100644 --- a/templates/tuned.conf.j2 +++ b/templates/tuned.conf.j2 @@ -2,25 +2,6 @@ include={{ item.1 }} summary={{ item.1 }} + TCP/RAID tweaks + AMDGPU pp_power_profile_mode = {{ item.0.value }} ({{ item.0.name }}) -[sysfs] -/sys/class/drm/{{ card }}/device/pp_power_profile_mode = {{ item.0.value }} -{% if 'VR' in item.0.name or '3D' in item.0.name or 'compute' in item.0.name or 'custom' in item.0.name %} -{# TODO: if 'custom' profile: #} -{# set 'power_dpm_force_performance_level' to manual #} -{# set individual clocks (eg: pp_dpm_mclk/pp_dpm_sclk/pp_dpm_pcie) #} -{# with user-provided values for those clocks #} -# configure GPU power/clock characteristics -# ref: https://docs.kernel.org/gpu/amdgpu/thermal.html -/sys/class/drm/{{ card }}/device/power_dpm_force_performance_level = high -# limit perf profiles to {{ power_max_multi * 100.0 |int }}% of the max power capability -/sys/class/drm/{{ card }}/device/hwmon/hwmon9/power1_cap = {{ power_cap }} -{% else %} -# choose power saving dpm clock options -/sys/class/drm/{{ card }}/device/power_dpm_force_performance_level = low -# limit lower power modes to 50% of the max power capability -/sys/class/drm/{{ card }}/device/hwmon/hwmon9/power1_cap = {{ power_cap_half }} -{% endif %} - [sysctl] net.core.default_qdisc=fq # 'bbr2' requires a [modified] supporting kernel - stock Fedora kernels do *not* support it (currently) @@ -32,3 +13,31 @@ dev.raid.speed_limit_min=600000 dev.raid.speed_limit_max=9000000 # allow some games to run (eg: DayZ) vm.max_map_count=1048576 + +{% if 'default' in item.0.name %} +# reference/execute AMDGPU clock control *reset* script +[gpuresetscript] +type=script +script=${i:PROFILE_DIR}/amdgpu-clock-reset.sh + +[sysfs] +# configure GPU power/clock characteristics +# ref: https://docs.kernel.org/gpu/amdgpu/thermal.html +/sys/class/drm/{{ card }}/device/pp_power_profile_mode = {{ item.0.value }} +/sys/class/drm/{{ card }}/device/power_dpm_force_performance_level = auto +# give default profile {{ power_max_multi * 50.0 |int }}% (max) power capability +/sys/class/drm/{{ card }}/device/hwmon/hwmon9/power1_cap = {{ power_cap_half }} +{% endif %} +{% if 'custom' in item.0.name %} +[sysfs] +# configure GPU power/clock characteristics +# ref: https://docs.kernel.org/gpu/amdgpu/thermal.html +/sys/class/drm/{{ card }}/device/pp_power_profile_mode = {{ item.0.value }} +# give this custom oriented profile {{ power_max_multi * 100.0 |int }}% of the power capability +/sys/class/drm/{{ card }}/device/hwmon/hwmon9/power1_cap = {{ power_cap }} + +# reference/execute AMDGPU clock control script +[gpuclockscript] +type=script +script=${i:PROFILE_DIR}/amdgpu-clock.sh +{% endif %}