use (one source template) script for *all* GPU control
This commit is contained in:
parent
562843c069
commit
f780ca20ee
4 changed files with 50 additions and 60 deletions
26
playbook.yml
26
playbook.yml
|
@ -60,6 +60,15 @@
|
||||||
patterns:
|
patterns:
|
||||||
- '^power1_cap_max$'
|
- '^power1_cap_max$'
|
||||||
register: hwmon
|
register: hwmon
|
||||||
|
- name: find hwmon/current power limit file for {{ card }}
|
||||||
|
find:
|
||||||
|
paths: /sys/class/drm/{{ card }}/device/hwmon
|
||||||
|
file_type: file
|
||||||
|
recurse: yes
|
||||||
|
use_regex: yes
|
||||||
|
patterns:
|
||||||
|
- '^power1_cap$'
|
||||||
|
register: powercap_set
|
||||||
- name: get max power capability for {{ card }}
|
- name: get max power capability for {{ card }}
|
||||||
slurp:
|
slurp:
|
||||||
src: "{{ hwmon.files.0.path }}"
|
src: "{{ hwmon.files.0.path }}"
|
||||||
|
@ -72,26 +81,15 @@
|
||||||
with_nested:
|
with_nested:
|
||||||
- "{{ amdgpu_profiles }}"
|
- "{{ amdgpu_profiles }}"
|
||||||
- "{{ base_profiles }}"
|
- "{{ base_profiles }}"
|
||||||
- name: template AMDGPU clock control scripts (tuned profile dependency)
|
- name: template AMDGPU clock control scripts (default/GPU profile dependency)
|
||||||
template:
|
template:
|
||||||
src: templates/amdgpu-clock.sh.j2
|
src: templates/amdgpu-clock.sh.j2
|
||||||
dest: /etc/tuned/{{ item.1 }}-amdgpu-{{ item.0 }}/amdgpu-clock.sh
|
dest: /etc/tuned/{{ item.1 }}-amdgpu-{{ item.0.name }}/amdgpu-clock.sh
|
||||||
owner: root
|
owner: root
|
||||||
group: root
|
group: root
|
||||||
mode: "0755"
|
mode: "0755"
|
||||||
with_nested:
|
with_nested:
|
||||||
- 'custom'
|
- "{{ amdgpu_profiles }}"
|
||||||
- "{{ base_profiles }}"
|
|
||||||
notify: restart tuned
|
|
||||||
- name: template AMDGPU clock control *reset* script (tuned profile dependency)
|
|
||||||
template:
|
|
||||||
src: templates/amdgpu-clock-reset.sh.j2
|
|
||||||
dest: /etc/tuned/{{ item.1 }}-amdgpu-{{ item.0 }}/amdgpu-clock-reset.sh
|
|
||||||
owner: root
|
|
||||||
group: root
|
|
||||||
mode: "0755"
|
|
||||||
with_nested:
|
|
||||||
- 'default'
|
|
||||||
- "{{ base_profiles }}"
|
- "{{ base_profiles }}"
|
||||||
notify: restart tuned
|
notify: restart tuned
|
||||||
- name: template custom tuned profiles
|
- name: template custom tuned profiles
|
||||||
|
|
|
@ -1,12 +0,0 @@
|
||||||
#!/bin/bash
|
|
||||||
# script to reset tuned's AMDGPU clock control to default
|
|
||||||
#
|
|
||||||
# rendered by Ansible with environment-appropriate values:
|
|
||||||
# card #, eg: card0
|
|
||||||
# min/max GPU clocks
|
|
||||||
|
|
||||||
# set control mode back to auto
|
|
||||||
echo 'auto' | tee /sys/class/drm/{{ card }}/device/power_dpm_force_performance_level
|
|
||||||
|
|
||||||
# reset any existing profile clock changes
|
|
||||||
echo 'r' | tee /sys/class/drm/{{ card }}/device/pp_od_clk_voltage
|
|
|
@ -1,19 +1,46 @@
|
||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
# script for tuned AMDGPU clock control
|
# script for tuned AMDGPU clock control
|
||||||
# clocks in 3D usage are dynamic based on need/usage
|
# configures GPU power/clock characteristics
|
||||||
#
|
# clocks/power in 3D are dynamic based on need/usage
|
||||||
# this sets the minimums / maximums
|
|
||||||
#
|
#
|
||||||
# rendered by Ansible with environment-appropriate values:
|
# rendered by Ansible with environment-appropriate values:
|
||||||
# card #, eg: card0
|
# card #, eg: card0
|
||||||
# min/max GPU clocks
|
# path to discovered sysfs device files (power/clock/voltage control)
|
||||||
|
#
|
||||||
|
# this sets the minimums / maximums for a specific generation of GPU
|
||||||
|
# settings may need adjusted
|
||||||
|
#
|
||||||
|
# AMDGPU driver/sysfs references:
|
||||||
|
# https://01.org/linuxgraphics/gfx-docs/drm/gpu/amdgpu.html
|
||||||
|
# https://docs.kernel.org/gpu/amdgpu/thermal.html
|
||||||
|
|
||||||
# send a reset for a clean slate
|
{% if 'default' in item.0.name %}
|
||||||
# echo 'r' | tee /sys/class/drm/{{ card }}/device/pp_od_clk_voltage
|
# set power state transition heuristics to default
|
||||||
|
echo '{{ item.0.value }}' | tee /sys/class/drm/{{ card }}/device/pp_power_profile_mode
|
||||||
|
|
||||||
|
# set control mode back to auto
|
||||||
|
# attempts to dynamically set optimal power profile for conditions
|
||||||
|
echo 'auto' | tee /sys/class/drm/{{ card }}/device/power_dpm_force_performance_level
|
||||||
|
|
||||||
|
# reset any existing profile clock changes
|
||||||
|
echo 'r' | tee /sys/class/drm/{{ card }}/device/pp_od_clk_voltage
|
||||||
|
|
||||||
|
# give default profile {{ power_max_default_multi * 100.0 |int }}% of the max power capability
|
||||||
|
# {{ power_cap_default|int/1000000 }} Watts of {{ power_max|int/1000000 }} total
|
||||||
|
echo '{{ power_cap_default }}' | tee {{ powercap_set.files.0.path }}
|
||||||
|
{% elif 'custom' in item.0.name %}
|
||||||
# set manual control mode
|
# set manual control mode
|
||||||
|
# allow control via 'pp_dpm_mclk', 'pp_dpm_sclk', 'pp_dpm_pcie', and 'pp_power_profile_mode' files
|
||||||
echo 'manual' | tee /sys/class/drm/{{ card }}/device/power_dpm_force_performance_level
|
echo 'manual' | tee /sys/class/drm/{{ card }}/device/power_dpm_force_performance_level
|
||||||
|
|
||||||
|
# set power state transition heuristics to custom/manual
|
||||||
|
# looked up from amdgpu_profiles variable using 'with_nested' loop in task
|
||||||
|
echo '{{ item.0.value }}' | tee /sys/class/drm/{{ card }}/device/pp_power_profile_mode
|
||||||
|
|
||||||
|
# give this profile {{ power_max_custom_multi * 100.0 |int }}% of the max power capability
|
||||||
|
# {{ power_cap_custom|int/1000000 }} Watts of {{ power_max|int/1000000 }} total
|
||||||
|
echo '{{ power_cap_custom }}' | tee {{ powercap_set.files.0.path }}
|
||||||
|
|
||||||
# set the minimum GPU clock
|
# set the minimum GPU clock
|
||||||
echo 's 0 {{ gpu_clock_min }}' | tee /sys/class/drm/{{ card }}/device/pp_od_clk_voltage
|
echo 's 0 {{ gpu_clock_min }}' | tee /sys/class/drm/{{ card }}/device/pp_od_clk_voltage
|
||||||
|
|
||||||
|
@ -22,8 +49,8 @@ echo 's 1 {{ gpu_clock_max }}' | tee /sys/class/drm/{{ card }}/device/pp_od_clk_
|
||||||
|
|
||||||
# set the maximum GPU *memory* clock
|
# set the maximum GPU *memory* clock
|
||||||
echo 'm 1 {{ gpumem_clock_max }}' | tee /sys/class/drm/{{ card }}/device/pp_od_clk_voltage
|
echo 'm 1 {{ gpumem_clock_max }}' | tee /sys/class/drm/{{ card }}/device/pp_od_clk_voltage
|
||||||
|
|
||||||
{% if gpu_mv_offset is defined %}
|
{% if gpu_mv_offset is defined %}
|
||||||
|
|
||||||
# offset GPU voltage {{ gpu_mv_offset }}mV
|
# offset GPU voltage {{ gpu_mv_offset }}mV
|
||||||
echo 'vo {{ gpu_mv_offset }}' | tee /sys/class/drm/{{ card }}/device/pp_od_clk_voltage
|
echo 'vo {{ gpu_mv_offset }}' | tee /sys/class/drm/{{ card }}/device/pp_od_clk_voltage
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
@ -33,3 +60,4 @@ echo 'c' | tee /sys/class/drm/{{ card }}/device/pp_od_clk_voltage
|
||||||
|
|
||||||
# force GPU memory into highest profile
|
# force GPU memory into highest profile
|
||||||
echo '3' | tee /sys/class/drm/{{ card }}/device/pp_dpm_mclk
|
echo '3' | tee /sys/class/drm/{{ card }}/device/pp_dpm_mclk
|
||||||
|
{% endif %}
|
||||||
|
|
|
@ -14,33 +14,9 @@ dev.raid.speed_limit_max=9000000
|
||||||
# allow some games to run (eg: DayZ)
|
# allow some games to run (eg: DayZ)
|
||||||
vm.max_map_count=1048576
|
vm.max_map_count=1048576
|
||||||
|
|
||||||
{% if 'default' in item.0.name %}
|
# reference/execute AMDGPU control script
|
||||||
# reference/execute AMDGPU clock control *reset* script
|
# used because some sysfs interfaces are transactional
|
||||||
[gpuresetscript]
|
# cannot be set by a single param in 'tuned'/[sysfs]
|
||||||
type=script
|
|
||||||
script=${i:PROFILE_DIR}/amdgpu-clock-reset.sh
|
|
||||||
|
|
||||||
[sysfs]
|
|
||||||
# configure GPU power/clock characteristics
|
|
||||||
# ref: https://docs.kernel.org/gpu/amdgpu/thermal.html
|
|
||||||
/sys/class/drm/{{ card }}/device/pp_power_profile_mode = {{ item.0.value }}
|
|
||||||
/sys/class/drm/{{ card }}/device/power_dpm_force_performance_level = auto
|
|
||||||
{# # give default profile {{ power_max_default_multi * 100.0 |int }}% of the max power capability -- {{ power_max_float * power_max_default_multi / 1000000.0 |int }} Watts of {{ power_max / 1000000 |int }} total #}
|
|
||||||
# give default profile {{ power_max_default_multi * 100.0 |int }}% of the max power capability
|
|
||||||
# {{ power_cap_default|int/1000000 }} Watts of {{ power_max|int/1000000 }} total
|
|
||||||
/sys/class/drm/{{ card }}/device/hwmon/hwmon9/power1_cap = {{ power_cap_default }}
|
|
||||||
{% endif %}
|
|
||||||
{% if 'custom' in item.0.name %}
|
|
||||||
[sysfs]
|
|
||||||
# configure GPU power/clock characteristics
|
|
||||||
# ref: https://docs.kernel.org/gpu/amdgpu/thermal.html
|
|
||||||
/sys/class/drm/{{ card }}/device/pp_power_profile_mode = {{ item.0.value }}
|
|
||||||
# give this custom oriented profile {{ power_max_custom_multi * 100.0 |int }}% of the max power capability
|
|
||||||
# {{ power_cap_custom|int/1000000 }} Watts of {{ power_max|int/1000000 }} total
|
|
||||||
/sys/class/drm/{{ card }}/device/hwmon/hwmon9/power1_cap = {{ power_cap_custom }}
|
|
||||||
|
|
||||||
# reference/execute AMDGPU clock control script
|
|
||||||
[gpuclockscript]
|
[gpuclockscript]
|
||||||
type=script
|
type=script
|
||||||
script=${i:PROFILE_DIR}/amdgpu-clock.sh
|
script=${i:PROFILE_DIR}/amdgpu-clock.sh
|
||||||
{% endif %}
|
|
||||||
|
|
Loading…
Reference in a new issue