make card# assignment dynamic, clean up linting
This commit is contained in:
parent
e535ea3ebc
commit
f18704dbfe
4 changed files with 94 additions and 59 deletions
52
host_vars/localhost.yml
Normal file
52
host_vars/localhost.yml
Normal file
|
@ -0,0 +1,52 @@
|
||||||
|
---
|
||||||
|
# statically defined mapping of the contents in /sys/class/drm/card*/device/pp_power_profile_mode
|
||||||
|
# more may be added, but do not remove default/custom. new profiles require a script template, see 'templates'
|
||||||
|
amdgpu_profiles:
|
||||||
|
default:
|
||||||
|
pwrmode: 0
|
||||||
|
3D:
|
||||||
|
pwrmode: 1
|
||||||
|
VR:
|
||||||
|
pwrmode: 4
|
||||||
|
custom:
|
||||||
|
pwrmode: 6
|
||||||
|
|
||||||
|
# the multipliers against power capability to determine power limits for the non-OC (default)/OC (custom) profiles
|
||||||
|
# 0.5 = 50%, 1.0 = 100% (of card power capability, not stock limits)
|
||||||
|
#
|
||||||
|
# should not exceed 1.0, must be a float. driver will do some rounding/stepping
|
||||||
|
#
|
||||||
|
# see 'power_max multi tab calculator.ods' for a calculator/spreadsheet
|
||||||
|
# adjust cell F14 (board max power) and the 'effective watts' column to update calculations
|
||||||
|
# microWatt board power capability can be discovered like so: 'cat /sys/class/drm/card*/device/hwmon/hwmon*/power1_cap_max'
|
||||||
|
# power_max_default_multi: 0.75 # 242.25W, slightly lower than true default
|
||||||
|
# power for the default profile
|
||||||
|
power_max_default_multi: 0.789473684210526 # 255W - default
|
||||||
|
# power_max_default_multi: 0.820433436532508 # 265W
|
||||||
|
# power_max_default_multi: 0.851393188854489 # 275W
|
||||||
|
# power_max_default_multi: 0.869969040247678 # 281W
|
||||||
|
|
||||||
|
# power for the custom profile
|
||||||
|
#power_max_custom_multi: 0.789473684210526 # 255W - default
|
||||||
|
power_max_custom_multi: 0.869969040247678 # 281W
|
||||||
|
# alt default power limits
|
||||||
|
# power_max_default_multi: 0.696594427244582 # 225W
|
||||||
|
# power_max_default_multi: 0.869969040247678 # 281W
|
||||||
|
#
|
||||||
|
# minimum/maximum GPU clocks using 'powerplay' below
|
||||||
|
# these do *not* apply to the resulting 'amdgpu-default' tuned profile
|
||||||
|
#
|
||||||
|
# only the others (eg: 'VR'/'custom') under amdgpu_profiles (below)
|
||||||
|
# effective clocks are dynamically adjusted by the driver in this range - based on utilization
|
||||||
|
# can find baseline values in the '/sys/class/drm/{{ card }}/device/pp_od_clk_voltage' file
|
||||||
|
# OD_RANGE indicates acceptable value ranges for SCLK (core) and MCLK (memory)
|
||||||
|
# Under 'OD_SCLK' and 'OD_MCLK' you will see 0/1. These are the minimum/maximum values for the respective component.
|
||||||
|
gpu_clock_min: "500" # default 500
|
||||||
|
gpu_clock_max: "2500" # default 2529
|
||||||
|
#
|
||||||
|
# memory clocks are static, we only set a 'max' - high-refresh rate display quirk
|
||||||
|
# this allows the memory clock to be adjusted
|
||||||
|
gpumem_clock_max: "1050"
|
||||||
|
#
|
||||||
|
# optional, applies offset to GPU voltage, eg: '+100' = to boost GPU core voltage 100mV or 0.1V. for the 'custom' GPU profile.
|
||||||
|
#gpu_mv_offset: "+50"
|
5
inventories/localhost.yml
Normal file
5
inventories/localhost.yml
Normal file
|
@ -0,0 +1,5 @@
|
||||||
|
---
|
||||||
|
lab:
|
||||||
|
hosts:
|
||||||
|
localhost:
|
||||||
|
ansible_connection: local
|
69
playbook.yml
69
playbook.yml
|
@ -1,36 +1,7 @@
|
||||||
---
|
---
|
||||||
- hosts: localhost
|
- hosts: localhost
|
||||||
become: yes
|
become: true
|
||||||
vars:
|
vars:
|
||||||
# the multipliers against power capability to determine power limits for the non-OC (default)/OC (custom) profiles
|
|
||||||
# 0.5 = 50%
|
|
||||||
# 1.0 = 100% (of card power capability, not stock limits)
|
|
||||||
# should not exceed 1.0, must be a float. driver will do some rounding/stepping
|
|
||||||
# default is ~87% (281/323) on my 6900XT. check '/sys/class/drm/card*/device/hwmon/hwmon*/power1_cap_default'
|
|
||||||
power_max_custom_multi: 0.928793 # calculated to ~300W. used to control the effective power limit in the non-default AMDGPU tuned profiles
|
|
||||||
power_max_default_multi: 0.82 # (typically) used to limit GPU power to some lower percentage on default perf mode/profile
|
|
||||||
#
|
|
||||||
# minimum/maximum GPU clocks using 'powerplay'
|
|
||||||
# these do not apply to the resulting 'amdgpu-default' tuned profile
|
|
||||||
# only the others (eg: 'VR'/'custom') under amdgpu_profiles (below)
|
|
||||||
# effective clocks are dynamically adjusted by the driver in this range - based on utilization
|
|
||||||
# can find baseline values in the '/sys/class/drm/{{ card }}/device/pp_od_clk_voltage' file
|
|
||||||
# OD_RANGE indicates acceptable value ranges for SCLK (core) and MCLK (memory)
|
|
||||||
# Under 'OD_SCLK' and 'OD_MCLK' you will see 0/1. These are the minimum/maximum values for the respective component.
|
|
||||||
gpu_clock_min: "2000"
|
|
||||||
gpu_clock_max: "2615"
|
|
||||||
#
|
|
||||||
# memory clocks are static, we only set a 'max' - high-refresh rate display quirk
|
|
||||||
# this allows the memory clock to be adjusted
|
|
||||||
gpumem_clock_max: "1075"
|
|
||||||
#
|
|
||||||
# optional, applies offset to GPU voltage, eg: '+100' = to boost GPU core voltage 100mV or 0.1V. for the 'custom' GPU profile.
|
|
||||||
gpu_mv_offset: "-25"
|
|
||||||
#
|
|
||||||
# the card for 'tuned' to control
|
|
||||||
# this is *usually* 'card0', but may differ.
|
|
||||||
# check '/dev/dri/card*'
|
|
||||||
card: 'card0' # default to card0
|
|
||||||
# list of source tuned profiles available on Fedora (TODO: should dynamically discover)
|
# list of source tuned profiles available on Fedora (TODO: should dynamically discover)
|
||||||
# further modified with AMD GPU power/clock parameters, creating new profiles. eg: 'balanced-amdgpu-VR'
|
# further modified with AMD GPU power/clock parameters, creating new profiles. eg: 'balanced-amdgpu-VR'
|
||||||
base_profiles:
|
base_profiles:
|
||||||
|
@ -41,13 +12,6 @@
|
||||||
- network-throughput
|
- network-throughput
|
||||||
- powersave
|
- powersave
|
||||||
- virtual-host
|
- virtual-host
|
||||||
amdgpu_profiles: # statically defined mapping of the contents in /sys/class/drm/card*/device/pp_power_profile_mode
|
|
||||||
default: # more may be added, but do not remove default/custom. new profiles require a script template, see 'templates'
|
|
||||||
pwrmode: 0
|
|
||||||
VR:
|
|
||||||
pwrmode: 4
|
|
||||||
custom:
|
|
||||||
pwrmode: 6
|
|
||||||
handlers:
|
handlers:
|
||||||
- name: restart tuned
|
- name: restart tuned
|
||||||
ansible.builtin.service:
|
ansible.builtin.service:
|
||||||
|
@ -64,7 +28,10 @@
|
||||||
with_items:
|
with_items:
|
||||||
- {name: 'power-profiles-daemon', state: 'absent'}
|
- {name: 'power-profiles-daemon', state: 'absent'}
|
||||||
- {name: 'tuned', state: 'present'}
|
- {name: 'tuned', state: 'present'}
|
||||||
when: ('power-profiles-daemon' in ansible_facts.packages) or (('tuned' not in ansible_facts.packages) and ((ansible_distribution == 'Fedora') and (ansible_distribution_major_version|int > 35)))
|
when:
|
||||||
|
- ('power-profiles-daemon' in ansible_facts.packages) or ('tuned' not in ansible_facts.packages)
|
||||||
|
- ansible_distribution == 'Fedora'
|
||||||
|
- ansible_distribution_major_version|int > 35
|
||||||
register: fed_ppdtuned_swap
|
register: fed_ppdtuned_swap
|
||||||
# 'power-profiles-daemon' was added/conflicts with 'tuned' since F35
|
# 'power-profiles-daemon' was added/conflicts with 'tuned' since F35
|
||||||
# otherwise, ensure the 'tuned' package is installed
|
# otherwise, ensure the 'tuned' package is installed
|
||||||
|
@ -73,25 +40,31 @@
|
||||||
name: tuned
|
name: tuned
|
||||||
state: present
|
state: present
|
||||||
when: (fed_ppdtuned_swap is not defined) or ('tuned' not in ansible_facts.packages)
|
when: (fed_ppdtuned_swap is not defined) or ('tuned' not in ansible_facts.packages)
|
||||||
- name: find hwmon/max power capability file for {{ card }}
|
- name: determine GPU device in drm subsystem
|
||||||
|
shell:
|
||||||
|
cmd: grep -ls ^connected /sys/class/drm/*/status | grep -o card[0-9] | sort | uniq | tail -1
|
||||||
|
executable: /bin/bash
|
||||||
|
changed_when: false
|
||||||
|
register: card
|
||||||
|
- name: find hwmon/max power capability file for {{ card.stdout }}
|
||||||
find:
|
find:
|
||||||
paths: /sys/class/drm/{{ card }}/device/hwmon
|
paths: /sys/class/drm/{{ card.stdout }}/device/hwmon
|
||||||
file_type: file
|
file_type: file
|
||||||
recurse: yes
|
recurse: true
|
||||||
use_regex: yes
|
use_regex: true
|
||||||
patterns:
|
patterns:
|
||||||
- '^power1_cap_max$'
|
- '^power1_cap_max$'
|
||||||
register: hwmon
|
register: hwmon
|
||||||
- name: find hwmon/current power limit file for {{ card }}
|
- name: find hwmon/current power limit file for {{ card.stdout }}
|
||||||
find:
|
find:
|
||||||
paths: /sys/class/drm/{{ card }}/device/hwmon
|
paths: /sys/class/drm/{{ card.stdout }}/device/hwmon
|
||||||
file_type: file
|
file_type: file
|
||||||
recurse: yes
|
recurse: true
|
||||||
use_regex: yes
|
use_regex: true
|
||||||
patterns:
|
patterns:
|
||||||
- '^power1_cap$'
|
- '^power1_cap$'
|
||||||
register: powercap_set
|
register: powercap_set
|
||||||
- name: get max power capability for {{ card }}
|
- name: get max power capability for {{ card.stdout }}
|
||||||
slurp:
|
slurp:
|
||||||
src: "{{ hwmon.files.0.path }}"
|
src: "{{ hwmon.files.0.path }}"
|
||||||
register: power_max_b64
|
register: power_max_b64
|
||||||
|
@ -128,4 +101,4 @@
|
||||||
- name: ensure tuned is enabled
|
- name: ensure tuned is enabled
|
||||||
service:
|
service:
|
||||||
name: tuned
|
name: tuned
|
||||||
enabled: yes
|
enabled: true
|
||||||
|
|
|
@ -13,18 +13,23 @@
|
||||||
# AMDGPU driver/sysfs references:
|
# AMDGPU driver/sysfs references:
|
||||||
# https://01.org/linuxgraphics/gfx-docs/drm/gpu/amdgpu.html
|
# https://01.org/linuxgraphics/gfx-docs/drm/gpu/amdgpu.html
|
||||||
# https://docs.kernel.org/gpu/amdgpu/thermal.html
|
# https://docs.kernel.org/gpu/amdgpu/thermal.html
|
||||||
|
|
||||||
|
{# done this way to avoid issues with the card number possibly shifting after playbook run #}
|
||||||
|
# dynamically determine the connected GPU using the DRM subsystem
|
||||||
|
CARD=$(/usr/bin/grep -ls ^connected /sys/class/drm/*/status | /usr/bin/grep -o 'card[0-9]' | /usr/bin/sort | /usr/bin/uniq | /usr/bin/tail -1)
|
||||||
|
|
||||||
{# begin the templated script for 'default' profiles to reset state #}
|
{# begin the templated script for 'default' profiles to reset state #}
|
||||||
{% if 'default' in item.0.key %}
|
{% if 'default' in item.0.key %}
|
||||||
|
|
||||||
# set power state transition heuristics to default
|
# set power state transition heuristics to default
|
||||||
echo '{{ item.0.value.pwrmode }}' | tee /sys/class/drm/{{ card }}/device/pp_power_profile_mode
|
echo '{{ item.0.value.pwrmode }}' | tee /sys/class/drm/"${CARD}"/device/pp_power_profile_mode
|
||||||
|
|
||||||
# set control mode back to auto
|
# set control mode back to auto
|
||||||
# attempts to dynamically set optimal power profile for (load) conditions
|
# attempts to dynamically set optimal power profile for (load) conditions
|
||||||
echo 'auto' | tee /sys/class/drm/{{ card }}/device/power_dpm_force_performance_level
|
echo 'auto' | tee /sys/class/drm/"${CARD}"/device/power_dpm_force_performance_level
|
||||||
|
|
||||||
# reset any existing profile clock changes
|
# reset any existing profile clock changes
|
||||||
echo 'r' | tee /sys/class/drm/{{ card }}/device/pp_od_clk_voltage
|
echo 'r' | tee /sys/class/drm/"${CARD}"/device/pp_od_clk_voltage
|
||||||
|
|
||||||
# give default profile {{ power_max_default_multi * 100.0 |int }}% of the max power capability
|
# give default profile {{ power_max_default_multi * 100.0 |int }}% of the max power capability
|
||||||
# {{ power_cap_default|int/1000000 }} Watts of {{ power_max|int/1000000 }} total
|
# {{ power_cap_default|int/1000000 }} Watts of {{ power_max|int/1000000 }} total
|
||||||
|
@ -36,34 +41,34 @@ echo '{{ power_cap_default }}' | tee {{ powercap_set.files.0.path }}
|
||||||
# allows control via 'pp_dpm_mclk', 'pp_dpm_sclk', 'pp_dpm_pcie', 'pp_dpm_fclk', and 'pp_power_profile_mode' files
|
# allows control via 'pp_dpm_mclk', 'pp_dpm_sclk', 'pp_dpm_pcie', 'pp_dpm_fclk', and 'pp_power_profile_mode' files
|
||||||
# only interested in 'pp_power_profile_mode' for power and 'pp_dpm_mclk' for memory clock (flickering).
|
# only interested in 'pp_power_profile_mode' for power and 'pp_dpm_mclk' for memory clock (flickering).
|
||||||
# GPU clocks are dynamic based on (load) condition
|
# GPU clocks are dynamic based on (load) condition
|
||||||
echo 'manual' | tee /sys/class/drm/{{ card }}/device/power_dpm_force_performance_level
|
echo 'manual' | tee /sys/class/drm/"${CARD}"/device/power_dpm_force_performance_level
|
||||||
|
|
||||||
# set power state transition heuristics to '{{ item.0.key }}' profile
|
# set power state transition heuristics to '{{ item.0.key }}' profile
|
||||||
echo '{{ item.0.value.pwrmode }}' | tee /sys/class/drm/{{ card }}/device/pp_power_profile_mode
|
echo '{{ item.0.value.pwrmode }}' | tee /sys/class/drm/"${CARD}"/device/pp_power_profile_mode
|
||||||
|
|
||||||
# give this profile {{ power_max_custom_multi * 100.0 |int }}% of the max power capability
|
# give this profile {{ power_max_custom_multi * 100.0 |int }}% of the max power capability
|
||||||
# {{ power_cap_custom|int/1000000 }} Watts of {{ power_max|int/1000000 }} total
|
# {{ power_cap_custom|int/1000000 }} Watts of {{ power_max|int/1000000 }} total
|
||||||
echo '{{ power_cap_custom }}' | tee {{ powercap_set.files.0.path }}
|
echo '{{ power_cap_custom }}' | tee {{ powercap_set.files.0.path }}
|
||||||
|
|
||||||
# set the minimum GPU clock
|
# set the minimum GPU clock
|
||||||
echo 's 0 {{ gpu_clock_min }}' | tee /sys/class/drm/{{ card }}/device/pp_od_clk_voltage
|
echo 's 0 {{ gpu_clock_min }}' | tee /sys/class/drm/"${CARD}"/device/pp_od_clk_voltage
|
||||||
|
|
||||||
# set the maximum GPU clock
|
# set the maximum GPU clock
|
||||||
echo 's 1 {{ gpu_clock_max }}' | tee /sys/class/drm/{{ card }}/device/pp_od_clk_voltage
|
echo 's 1 {{ gpu_clock_max }}' | tee /sys/class/drm/"${CARD}"/device/pp_od_clk_voltage
|
||||||
|
|
||||||
# set the maximum GPU *memory* clock
|
# set the maximum GPU *memory* clock
|
||||||
echo 'm 1 {{ gpumem_clock_max }}' | tee /sys/class/drm/{{ card }}/device/pp_od_clk_voltage
|
echo 'm 1 {{ gpumem_clock_max }}' | tee /sys/class/drm/"${CARD}"/device/pp_od_clk_voltage
|
||||||
{% if gpu_mv_offset is defined %}
|
{% if gpu_mv_offset is defined %}
|
||||||
|
|
||||||
# offset GPU voltage {{ gpu_mv_offset }}mV
|
# offset GPU voltage {{ gpu_mv_offset }}mV
|
||||||
echo 'vo {{ gpu_mv_offset }}' | tee /sys/class/drm/{{ card }}/device/pp_od_clk_voltage
|
echo 'vo {{ gpu_mv_offset }}' | tee /sys/class/drm/"${CARD}"/device/pp_od_clk_voltage
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
|
||||||
# commit the changes
|
# commit the changes
|
||||||
echo 'c' | tee /sys/class/drm/{{ card }}/device/pp_od_clk_voltage
|
echo 'c' | tee /sys/class/drm/"${CARD}"/device/pp_od_clk_voltage
|
||||||
|
|
||||||
# force GPU memory into highest clock (fix flickering)
|
# force GPU memory into highest clock (fix flickering)
|
||||||
# pp_dpm_*clk settings are unintuitive, giving profiles that may be used
|
# pp_dpm_*clk settings are unintuitive, giving profiles that may be used
|
||||||
# opt not to set the others (eg: sclk/fclk) - those should remain for benefits from the curve
|
# opt not to set the others (eg: sclk/fclk) - those should remain for benefits from the curve
|
||||||
echo '3' | tee /sys/class/drm/{{ card }}/device/pp_dpm_mclk
|
echo '3' | tee /sys/class/drm/"${CARD}"/device/pp_dpm_mclk
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
|
Loading…
Reference in a new issue