tuned-amdgpu/playbook.yml

131 lines
5.3 KiB
YAML

---
- hosts: localhost
become: yes
vars:
# the multipliers against power capability to determine power limits for the non-OC (default)/OC (custom) profiles
# 0.5 = 50%
# 1.0 = 100% (of card power capability, not stock limits)
# should not exceed 1.0, must be a float. driver will do some rounding/stepping
# default is ~87% (281/323) on my 6900XT. check '/sys/class/drm/card*/device/hwmon/hwmon*/power1_cap_default'
power_max_custom_multi: 0.928793 # calculated to ~300W. used to control the effective power limit in the non-default AMDGPU tuned profiles
power_max_default_multi: 0.82 # (typically) used to limit GPU power to some lower percentage on default perf mode/profile
#
# minimum/maximum GPU clocks using 'powerplay'
# these do not apply to the resulting 'amdgpu-default' tuned profile
# only the others (eg: 'VR'/'custom') under amdgpu_profiles (below)
# effective clocks are dynamically adjusted by the driver in this range - based on utilization
# can find baseline values in the '/sys/class/drm/{{ card }}/device/pp_od_clk_voltage' file
# OD_RANGE indicates acceptable value ranges for SCLK (core) and MCLK (memory)
# Under 'OD_SCLK' and 'OD_MCLK' you will see 0/1. These are the minimum/maximum values for the respective component.
gpu_clock_min: "2000"
gpu_clock_max: "2615"
#
# memory clocks are static, we only set a 'max' - high-refresh rate display quirk
# this allows the memory clock to be adjusted
gpumem_clock_max: "1075"
#
# optional, applies offset to GPU voltage, eg: '+100' = to boost GPU core voltage 100mV or 0.1V. for the 'custom' GPU profile.
gpu_mv_offset: "-25"
#
# the card for 'tuned' to control
# this is *usually* 'card0', but may differ.
# check '/dev/dri/card*'
card: 'card0' # default to card0
# list of source tuned profiles available on Fedora (TODO: should dynamically discover)
# further modified with AMD GPU power/clock parameters, creating new profiles. eg: 'balanced-amdgpu-VR'
base_profiles:
- balanced
- desktop
- latency-performance
- network-latency
- network-throughput
- powersave
- virtual-host
amdgpu_profiles: # statically defined mapping of the contents in /sys/class/drm/card*/device/pp_power_profile_mode
default: # more may be added, but do not remove default/custom. new profiles require a script template, see 'templates'
pwrmode: 0
VR:
pwrmode: 4
custom:
pwrmode: 6
handlers:
- name: restart tuned
ansible.builtin.service:
name: tuned
state: restarted
tasks:
- name: Gather package facts
ansible.builtin.package_facts:
manager: auto
- name: replace 'power-profiles-daemon' with 'tuned' on Fedora 35+
dnf: # use with_items since 'dnf' module in Ansible doesn't support 'swap'
name: "{{ item.name }}"
state: "{{ item.state }}"
with_items:
- {name: 'power-profiles-daemon', state: 'absent'}
- {name: 'tuned', state: 'present'}
when: ('power-profiles-daemon' in ansible_facts.packages) or (('tuned' not in ansible_facts.packages) and ((ansible_distribution == 'Fedora') and (ansible_distribution_major_version|int > 35)))
register: fed_ppdtuned_swap
# 'power-profiles-daemon' was added/conflicts with 'tuned' since F35
# otherwise, ensure the 'tuned' package is installed
- name: install tuned
package:
name: tuned
state: present
when: (fed_ppdtuned_swap is not defined) or ('tuned' not in ansible_facts.packages)
- name: find hwmon/max power capability file for {{ card }}
find:
paths: /sys/class/drm/{{ card }}/device/hwmon
file_type: file
recurse: yes
use_regex: yes
patterns:
- '^power1_cap_max$'
register: hwmon
- name: find hwmon/current power limit file for {{ card }}
find:
paths: /sys/class/drm/{{ card }}/device/hwmon
file_type: file
recurse: yes
use_regex: yes
patterns:
- '^power1_cap$'
register: powercap_set
- name: get max power capability for {{ card }}
slurp:
src: "{{ hwmon.files.0.path }}"
register: power_max_b64
- name: create custom profile directories
file:
state: directory
path: /etc/tuned/{{ item.1 }}-amdgpu-{{ item.0.key }}
mode: "0755"
with_nested:
- "{{ lookup('dict', amdgpu_profiles) }}"
- "{{ base_profiles }}"
- name: template AMDGPU control/reset scripts
template:
src: templates/amdgpu-clock.sh.j2
dest: /etc/tuned/{{ item.1 }}-amdgpu-{{ item.0.key }}/amdgpu-clock.sh
owner: root
group: root
mode: "0755"
with_nested:
- "{{ lookup('dict', amdgpu_profiles) }}"
- "{{ base_profiles }}"
notify: restart tuned
- name: template custom tuned profiles
template:
src: templates/tuned.conf.j2
dest: /etc/tuned/{{ item.1 }}-amdgpu-{{ item.0.key }}/tuned.conf
owner: root
group: root
mode: "0644"
with_nested:
- "{{ lookup('dict', amdgpu_profiles) }}"
- "{{ base_profiles }}"
notify: restart tuned
- name: ensure tuned is enabled
service:
name: tuned
enabled: yes