This commit is contained in:
Josh Lay 2022-11-27 00:51:39 -06:00
parent 23f0ad541d
commit b525797f8f
Signed by: jlay
GPG key ID: B265E45CACAD108A
7 changed files with 155 additions and 155 deletions

View file

@ -1,49 +0,0 @@
---
# statically defined mapping of gpu profiles
# nets adjusted tuned profiles based on the contents in /sys/class/drm/card*/device/pp_power_profile_mode
# adds a power multiplier, see comments below for more info
# more reference driver profiles may be added, but do not remove default.
amdgpu_profiles:
default:
pwrmode: 0
pwr_cap_multi: 0.789473684210526 # 255W - default
3D:
pwrmode: 1
pwr_cap_multi: 0.789473684210526 # 255W - default
VR:
pwrmode: 4
pwr_cap_multi: 0.789473684210526 # 255W - default
compute:
pwrmode: 5
pwr_cap_multi: 0.789473684210526 # 255W - default
custom:
pwrmode: 6
pwr_cap_multi: 0.869969040247678 # 281W - slight boost
# pwr_cap_multi is multiplier against *board power capability* to determine power limits on the associated profile
# 0.5 = 50%, 1.0 = 100% (of card power capability, not stock limits)
#
# should not exceed 1.0, must be a float. driver will do some rounding/stepping
#
# see 'power_max multi tab calculator.ods' for a calculator/spreadsheet
# adjust cell F14 (board max power) and the 'effective watts' column to update calculations
# microWatt board power capability can be discovered like so: 'cat /sys/class/drm/card*/device/hwmon/hwmon*/power1_cap_max'
# power_max_default_multi: 0.75 # 242.25W, slightly lower than true default
#
# minimum/maximum GPU clocks using 'powerplay' below
# these do *not* apply to the resulting 'amdgpu-default' tuned profile
#
# only the others (eg: 'VR'/'custom') under amdgpu_profiles (below)
# effective clocks are dynamically adjusted by the driver in this range - based on utilization
# can find baseline values in the '/sys/class/drm/{{ card }}/device/pp_od_clk_voltage' file
# OD_RANGE indicates acceptable value ranges for SCLK (core) and MCLK (memory)
# Under 'OD_SCLK' and 'OD_MCLK' you will see 0/1. These are the minimum/maximum values for the respective component.
gpu_clock_min: "500" # default 500
gpu_clock_max: "2500" # default 2529
#
# memory clocks are static, we only set a 'max' - high-refresh rate display quirk
# this allows the memory clock to be adjusted
gpumem_clock_max: "1050"
#
# optional, applies offset to GPU voltage, eg: '+100' = to boost GPU core voltage 100mV or 0.1V. for the 'custom' GPU profile.
# gpu_mv_offset: "+50"

View file

@ -1,10 +1,18 @@
--- ---
- hosts: localhost - hosts: localhost
name: "Configure 'tuned' with AMD GPU control" name: "Configure 'tuned' with AMD GPU control"
become: true roles:
vars: # role prepares/modifies 'tuned' with AMD GPU power/clock parameters
# creates a new tuned profile made for each permutation of (base) 'tuned' profile + AMD powerplay profile
- role: tuned_amdgpu
# GPU *core* min/max (dynamic) clock, only applies to non-default amdgpu profiles
gpu_clock_min: "750" # default 500
gpu_clock_max: "2600" # default 2529
# GPU memory *static* clock, should also only apply for non-default amdgpu profiles
gpumem_clock_max: "1050"
# optional, applies offset to GPU voltage, eg: '+100' = to boost GPU core voltage 100mV or 0.1V. for the 'custom' GPU profile.
# gpu_mv_offset: "+50"
# list of source tuned profiles available on Fedora (TODO: should dynamically discover) # list of source tuned profiles available on Fedora (TODO: should dynamically discover)
# further modified with AMD GPU power/clock parameters, creating new profiles. eg: 'balanced-amdgpu-VR'
base_profiles: base_profiles:
- balanced - balanced
- desktop - desktop
@ -13,93 +21,24 @@
- network-throughput - network-throughput
- powersave - powersave
- virtual-host - virtual-host
handlers: # mapping of typical Navi generation power profiles from:
- name: Restart tuned # /sys/class/drm/card*/device/pp_power_profile_mode
ansible.builtin.service: # ref: https://www.kernel.org/doc/html/v4.20/gpu/amdgpu.html#pp-power-profile-mode
name: tuned amdgpu_profiles:
state: restarted default:
tasks: pwrmode: 0
- name: Gather package facts pwr_cap_multi: 0.789473684210526 # 255W - default
ansible.builtin.package_facts: 3D:
manager: auto pwrmode: 1
- name: Replace 'power-profiles-daemon' with 'tuned' on Fedora 35+ pwr_cap_multi: 0.789473684210526 # 255W - default
ansible.builtin.package: # use with_items/pkg since 'dnf' module in Ansible doesn't support 'swap' VR:
name: "{{ item.name }}" pwrmode: 4
state: "{{ item.state }}" pwr_cap_multi: 0.789473684210526 # 255W - default
with_items: compute:
- {name: 'power-profiles-daemon', state: 'absent'} pwrmode: 5
- {name: 'tuned', state: 'present'} pwr_cap_multi: 0.789473684210526 # 255W - default
when: custom:
- ('power-profiles-daemon' in ansible_facts.packages) or ('tuned' not in ansible_facts.packages) pwrmode: 6
- ansible_distribution == 'Fedora' pwr_cap_multi: 0.869969040247678 # 281W - slight boost
- ansible_distribution_major_version|int > 35 # both dictionaries are merged to create new 'tuned' profiles. eg:
register: fed_ppdtuned_swap # 'balanced-amdgpu-default', 'balanced-amdgpu-3D', 'balanced-amdgpu-video'
# 'power-profiles-daemon' was added/conflicts with 'tuned' since F35
# otherwise, ensure the 'tuned' package is installed
- name: Install tuned
ansible.builtin.package:
name: tuned
state: present
when: (fed_ppdtuned_swap is not defined) or ('tuned' not in ansible_facts.packages)
- name: Determine GPU device in drm subsystem
ansible.builtin.shell:
cmd: grep -ls ^connected /sys/class/drm/*/status | grep -o card[0-9] | sort | uniq | sort -h | tail -1
executable: /bin/bash
changed_when: false
register: card
- name: Find hwmon/max power capability file for {{ card.stdout }}
ansible.builtin.find:
paths: /sys/class/drm/{{ card.stdout }}/device/hwmon
file_type: file
recurse: true
use_regex: true
patterns:
- '^power1_cap_max$'
register: hwmon
- name: Find hwmon/current power limit file for {{ card.stdout }}
ansible.builtin.find:
paths: /sys/class/drm/{{ card.stdout }}/device/hwmon
file_type: file
recurse: true
use_regex: true
patterns:
- '^power1_cap$'
register: powercap_set
- name: Get max power capability for {{ card.stdout }}
ansible.builtin.slurp:
src: "{{ hwmon.files.0.path }}"
register: power_max_b64
- name: Create custom profile directories
ansible.builtin.file:
state: directory
path: /etc/tuned/{{ item.1 }}-amdgpu-{{ item.0.key }}
mode: "0755"
with_nested:
- "{{ lookup('dict', amdgpu_profiles) }}"
- "{{ base_profiles }}"
- name: Template AMDGPU control/reset scripts
ansible.builtin.template:
src: templates/amdgpu-clock.sh.j2
dest: /etc/tuned/{{ item.1 }}-amdgpu-{{ item.0.key }}/amdgpu-clock.sh
owner: root
group: root
mode: "0755"
with_nested:
- "{{ lookup('dict', amdgpu_profiles) }}"
- "{{ base_profiles }}"
notify: restart tuned
- name: Template custom tuned profiles
ansible.builtin.template:
src: templates/tuned.conf.j2
dest: /etc/tuned/{{ item.1 }}-amdgpu-{{ item.0.key }}/tuned.conf
owner: root
group: root
mode: "0644"
with_nested:
- "{{ lookup('dict', amdgpu_profiles) }}"
- "{{ base_profiles }}"
notify: restart tuned
- name: Ensure tuned is enabled
ansible.builtin.service:
name: tuned
enabled: true

View file

@ -1,9 +1,10 @@
--- ---
# misc default vars handling unit conversion RE: power capabilities/limits # defaults file for tuned_amdgpu
# #
# the discovered board limit for power capability (in microWatts) # vars handling unit conversion RE: power capabilities/limits
# the discovered board limit for power capability; in microWatts, then converted
power_max: "{{ power_max_b64['content'] | b64decode }}" power_max: "{{ power_max_b64['content'] | b64decode }}"
board_watts: "{{ power_max|int/1000000 }}" board_watts: "{{ power_max | int / 1000000 }}"
# internals for profile power calculations # internals for profile power calculations
# item in the context of the with_nested loops in the play # item in the context of the with_nested loops in the play
@ -11,4 +12,4 @@ profile_name: "{{ item.0.key }}"
profile_percentage: "{{ (item.0.value.pwr_cap_multi * 100.0) | round(2) }}" profile_percentage: "{{ (item.0.value.pwr_cap_multi * 100.0) | round(2) }}"
profile_multi: "{{ item.0.value.pwr_cap_multi }}" profile_multi: "{{ item.0.value.pwr_cap_multi }}"
profile_microwatts: "{{ power_max | float * profile_multi | float }}" profile_microwatts: "{{ power_max | float * profile_multi | float }}"
profile_watts: "{{ profile_microwatts | int/1000000 }}" profile_watts: "{{ profile_microwatts | int / 1000000 }}"

View file

@ -0,0 +1,6 @@
---
# handlers file for tuned_amdgpu
- name: Restart tuned
ansible.builtin.service:
name: tuned
state: restarted

View file

@ -0,0 +1,103 @@
---
# tasks file for tuned_amdgpu
#
- name: Gather package facts
ansible.builtin.package_facts:
manager: auto
# note: power-profiles-daemon conflicts with tuned
# since F35 it must be removed so tuned may be installed
- name: Replace 'power-profiles-daemon' with 'tuned' on Fedora 35+
ansible.builtin.package: # use with_items/pkg since 'dnf' module in Ansible doesn't support 'swap'
name: "{{ item.name }}"
state: "{{ item.state }}"
with_items:
- {name: 'power-profiles-daemon', state: 'absent'}
- {name: 'tuned', state: 'present'}
when:
- ('power-profiles-daemon' in ansible_facts.packages) or ('tuned' not in ansible_facts.packages)
- ansible_distribution == 'Fedora'
- ansible_distribution_major_version|int > 35
register: fed_ppdtuned_swap
become: true
- name: Install tuned
ansible.builtin.package:
name: tuned
state: present
when: (fed_ppdtuned_swap is not defined) or ('tuned' not in ansible_facts.packages)
become: true
- name: Determine GPU device in drm subsystem
ansible.builtin.shell:
cmd: grep -ls ^connected /sys/class/drm/*/status | grep -o card[0-9] | sort | uniq | sort -h | tail -1
executable: /bin/bash
changed_when: false
register: card
- name: Find hwmon/max power capability file for {{ card.stdout }}
ansible.builtin.find:
paths: /sys/class/drm/{{ card.stdout }}/device/hwmon
file_type: file
recurse: true
use_regex: true
patterns:
- '^power1_cap_max$'
register: hwmon
- name: Find hwmon/current power limit file for {{ card.stdout }}
ansible.builtin.find:
paths: /sys/class/drm/{{ card.stdout }}/device/hwmon
file_type: file
recurse: true
use_regex: true
patterns:
- '^power1_cap$'
register: powercap_set
- name: Get max power capability for {{ card.stdout }}
ansible.builtin.slurp:
src: "{{ hwmon.files.0.path }}"
register: power_max_b64
- name: Create custom profile directories
ansible.builtin.file:
state: directory
path: /etc/tuned/{{ item.1 }}-amdgpu-{{ item.0.key }}
mode: "0755"
with_nested:
- "{{ lookup('dict', amdgpu_profiles) }}"
- "{{ base_profiles }}"
become: true
- name: Template AMDGPU control/reset scripts
ansible.builtin.template:
src: templates/amdgpu-clock.sh.j2
dest: /etc/tuned/{{ item.1 }}-amdgpu-{{ item.0.key }}/amdgpu-clock.sh
owner: root
group: root
mode: "0755"
with_nested:
- "{{ lookup('dict', amdgpu_profiles) }}"
- "{{ base_profiles }}"
notify: Restart tuned
become: true
- name: Template custom tuned profiles
ansible.builtin.template:
src: templates/tuned.conf.j2
dest: /etc/tuned/{{ item.1 }}-amdgpu-{{ item.0.key }}/tuned.conf
owner: root
group: root
mode: "0644"
with_nested:
- "{{ lookup('dict', amdgpu_profiles) }}"
- "{{ base_profiles }}"
notify: Restart tuned
become: true
- name: Ensure tuned is enabled
ansible.builtin.service:
name: tuned
enabled: true
become: true