role-ify
This commit is contained in:
parent
23f0ad541d
commit
b525797f8f
7 changed files with 155 additions and 155 deletions
15
roles/tuned_amdgpu/defaults/main.yml
Normal file
15
roles/tuned_amdgpu/defaults/main.yml
Normal file
|
@ -0,0 +1,15 @@
|
|||
---
|
||||
# defaults file for tuned_amdgpu
|
||||
#
|
||||
# vars handling unit conversion RE: power capabilities/limits
|
||||
# the discovered board limit for power capability; in microWatts, then converted
|
||||
power_max: "{{ power_max_b64['content'] | b64decode }}"
|
||||
board_watts: "{{ power_max | int / 1000000 }}"
|
||||
|
||||
# internals for profile power calculations
|
||||
# item in the context of the with_nested loops in the play
|
||||
profile_name: "{{ item.0.key }}"
|
||||
profile_percentage: "{{ (item.0.value.pwr_cap_multi * 100.0) | round(2) }}"
|
||||
profile_multi: "{{ item.0.value.pwr_cap_multi }}"
|
||||
profile_microwatts: "{{ power_max | float * profile_multi | float }}"
|
||||
profile_watts: "{{ profile_microwatts | int / 1000000 }}"
|
6
roles/tuned_amdgpu/handlers/main.yml
Normal file
6
roles/tuned_amdgpu/handlers/main.yml
Normal file
|
@ -0,0 +1,6 @@
|
|||
---
|
||||
# handlers file for tuned_amdgpu
|
||||
- name: Restart tuned
|
||||
ansible.builtin.service:
|
||||
name: tuned
|
||||
state: restarted
|
103
roles/tuned_amdgpu/tasks/main.yml
Normal file
103
roles/tuned_amdgpu/tasks/main.yml
Normal file
|
@ -0,0 +1,103 @@
|
|||
---
|
||||
# tasks file for tuned_amdgpu
|
||||
#
|
||||
- name: Gather package facts
|
||||
ansible.builtin.package_facts:
|
||||
manager: auto
|
||||
|
||||
# note: power-profiles-daemon conflicts with tuned
|
||||
# since F35 it must be removed so tuned may be installed
|
||||
- name: Replace 'power-profiles-daemon' with 'tuned' on Fedora 35+
|
||||
ansible.builtin.package: # use with_items/pkg since 'dnf' module in Ansible doesn't support 'swap'
|
||||
name: "{{ item.name }}"
|
||||
state: "{{ item.state }}"
|
||||
with_items:
|
||||
- {name: 'power-profiles-daemon', state: 'absent'}
|
||||
- {name: 'tuned', state: 'present'}
|
||||
when:
|
||||
- ('power-profiles-daemon' in ansible_facts.packages) or ('tuned' not in ansible_facts.packages)
|
||||
- ansible_distribution == 'Fedora'
|
||||
- ansible_distribution_major_version|int > 35
|
||||
register: fed_ppdtuned_swap
|
||||
become: true
|
||||
|
||||
- name: Install tuned
|
||||
ansible.builtin.package:
|
||||
name: tuned
|
||||
state: present
|
||||
when: (fed_ppdtuned_swap is not defined) or ('tuned' not in ansible_facts.packages)
|
||||
become: true
|
||||
|
||||
- name: Determine GPU device in drm subsystem
|
||||
ansible.builtin.shell:
|
||||
cmd: grep -ls ^connected /sys/class/drm/*/status | grep -o card[0-9] | sort | uniq | sort -h | tail -1
|
||||
executable: /bin/bash
|
||||
changed_when: false
|
||||
register: card
|
||||
|
||||
- name: Find hwmon/max power capability file for {{ card.stdout }}
|
||||
ansible.builtin.find:
|
||||
paths: /sys/class/drm/{{ card.stdout }}/device/hwmon
|
||||
file_type: file
|
||||
recurse: true
|
||||
use_regex: true
|
||||
patterns:
|
||||
- '^power1_cap_max$'
|
||||
register: hwmon
|
||||
|
||||
- name: Find hwmon/current power limit file for {{ card.stdout }}
|
||||
ansible.builtin.find:
|
||||
paths: /sys/class/drm/{{ card.stdout }}/device/hwmon
|
||||
file_type: file
|
||||
recurse: true
|
||||
use_regex: true
|
||||
patterns:
|
||||
- '^power1_cap$'
|
||||
register: powercap_set
|
||||
|
||||
- name: Get max power capability for {{ card.stdout }}
|
||||
ansible.builtin.slurp:
|
||||
src: "{{ hwmon.files.0.path }}"
|
||||
register: power_max_b64
|
||||
|
||||
- name: Create custom profile directories
|
||||
ansible.builtin.file:
|
||||
state: directory
|
||||
path: /etc/tuned/{{ item.1 }}-amdgpu-{{ item.0.key }}
|
||||
mode: "0755"
|
||||
with_nested:
|
||||
- "{{ lookup('dict', amdgpu_profiles) }}"
|
||||
- "{{ base_profiles }}"
|
||||
become: true
|
||||
|
||||
- name: Template AMDGPU control/reset scripts
|
||||
ansible.builtin.template:
|
||||
src: templates/amdgpu-clock.sh.j2
|
||||
dest: /etc/tuned/{{ item.1 }}-amdgpu-{{ item.0.key }}/amdgpu-clock.sh
|
||||
owner: root
|
||||
group: root
|
||||
mode: "0755"
|
||||
with_nested:
|
||||
- "{{ lookup('dict', amdgpu_profiles) }}"
|
||||
- "{{ base_profiles }}"
|
||||
notify: Restart tuned
|
||||
become: true
|
||||
|
||||
- name: Template custom tuned profiles
|
||||
ansible.builtin.template:
|
||||
src: templates/tuned.conf.j2
|
||||
dest: /etc/tuned/{{ item.1 }}-amdgpu-{{ item.0.key }}/tuned.conf
|
||||
owner: root
|
||||
group: root
|
||||
mode: "0644"
|
||||
with_nested:
|
||||
- "{{ lookup('dict', amdgpu_profiles) }}"
|
||||
- "{{ base_profiles }}"
|
||||
notify: Restart tuned
|
||||
become: true
|
||||
|
||||
- name: Ensure tuned is enabled
|
||||
ansible.builtin.service:
|
||||
name: tuned
|
||||
enabled: true
|
||||
become: true
|
72
roles/tuned_amdgpu/templates/amdgpu-clock.sh.j2
Normal file
72
roles/tuned_amdgpu/templates/amdgpu-clock.sh.j2
Normal file
|
@ -0,0 +1,72 @@
|
|||
#!/bin/bash
|
||||
# script for tuned AMDGPU clock control
|
||||
# configures GPU power/clock characteristics
|
||||
# clocks/power in 3D are dynamic based on need/usage
|
||||
#
|
||||
# for 'amdgpu-default' tuned profiles, this will reset the characteristics to default
|
||||
# for others this will apply overclocking settings -- leaving clock choices to the associated power profile (eg: VR)
|
||||
#
|
||||
# rendered by Ansible with environment-appropriate values:
|
||||
# card #, eg: card0
|
||||
# path to discovered sysfs device files (power/clock/voltage control)
|
||||
#
|
||||
# AMDGPU driver/sysfs references:
|
||||
# https://01.org/linuxgraphics/gfx-docs/drm/gpu/amdgpu.html
|
||||
# https://docs.kernel.org/gpu/amdgpu/thermal.html
|
||||
|
||||
{# done this way to avoid issues with the card number possibly shifting after playbook run #}
|
||||
# dynamically determine the connected GPU using the DRM subsystem
|
||||
CARD=$(/usr/bin/grep -ls ^connected /sys/class/drm/*/status | /usr/bin/grep -o 'card[0-9]' | /usr/bin/sort | /usr/bin/uniq | /usr/bin/sort -h | /usr/bin/tail -1)
|
||||
|
||||
{# begin the templated script for 'default' profiles to reset state #}
|
||||
{% if 'default' in profile_name %}
|
||||
# set power state transition heuristics to default
|
||||
echo '{{ item.0.value.pwrmode }}' | tee /sys/class/drm/"${CARD}"/device/pp_power_profile_mode
|
||||
|
||||
# set control mode back to auto
|
||||
# attempts to dynamically set optimal power profile for (load) conditions
|
||||
echo 'auto' | tee /sys/class/drm/"${CARD}"/device/power_dpm_force_performance_level
|
||||
|
||||
# reset any existing profile clock changes
|
||||
echo 'r' | tee /sys/class/drm/"${CARD}"/device/pp_od_clk_voltage
|
||||
|
||||
# give '{{ profile_name }}' profile ~{{ profile_percentage }}% (rounded) of the max power capability
|
||||
# {{ profile_watts }} Watts of {{ board_watts }} total
|
||||
echo '{{ profile_microwatts | int }}' | tee '{{ powercap_set.files.0.path }}'
|
||||
{% else %}
|
||||
{# begin the templated script for non-default AMD GPU profiles, eg: 'VR' or '3D_FULL_SCREEN' #}
|
||||
# set manual control mode
|
||||
# allows control via 'pp_dpm_mclk', 'pp_dpm_sclk', 'pp_dpm_pcie', 'pp_dpm_fclk', and 'pp_power_profile_mode' files
|
||||
# only interested in 'pp_power_profile_mode' for power and 'pp_dpm_mclk' for memory clock (flickering).
|
||||
# GPU clocks are dynamic based on (load) condition
|
||||
echo 'manual' | tee /sys/class/drm/"${CARD}"/device/power_dpm_force_performance_level
|
||||
|
||||
# set power state transition heuristics to '{{ profile_name }}' profile
|
||||
echo '{{ item.0.value.pwrmode }}' | tee /sys/class/drm/"${CARD}"/device/pp_power_profile_mode
|
||||
|
||||
# give '{{ profile_name }}' profile ~{{ profile_percentage }}% (rounded) of the max power capability
|
||||
# {{ profile_watts }} Watts of {{ board_watts }} total
|
||||
echo '{{ profile_microwatts | int }}' | tee '{{ powercap_set.files.0.path }}'
|
||||
|
||||
# set the minimum GPU clock
|
||||
echo 's 0 {{ gpu_clock_min }}' | tee /sys/class/drm/"${CARD}"/device/pp_od_clk_voltage
|
||||
|
||||
# set the maximum GPU clock
|
||||
echo 's 1 {{ gpu_clock_max }}' | tee /sys/class/drm/"${CARD}"/device/pp_od_clk_voltage
|
||||
|
||||
# set the maximum GPU *memory* clock
|
||||
echo 'm 1 {{ gpumem_clock_max }}' | tee /sys/class/drm/"${CARD}"/device/pp_od_clk_voltage
|
||||
{% if gpu_mv_offset is defined %}
|
||||
|
||||
# offset GPU voltage {{ gpu_mv_offset }}mV
|
||||
echo 'vo {{ gpu_mv_offset }}' | tee /sys/class/drm/"${CARD}"/device/pp_od_clk_voltage
|
||||
{% endif %}
|
||||
|
||||
# commit the changes
|
||||
echo 'c' | tee /sys/class/drm/"${CARD}"/device/pp_od_clk_voltage
|
||||
|
||||
# force GPU memory into highest clock (fix flickering)
|
||||
# pp_dpm_*clk settings are unintuitive, giving profiles that may be used
|
||||
# opt not to set the others (eg: sclk/fclk) - those should remain for benefits from the curve
|
||||
echo '3' | tee /sys/class/drm/"${CARD}"/device/pp_dpm_mclk
|
||||
{% endif %}
|
22
roles/tuned_amdgpu/templates/tuned.conf.j2
Normal file
22
roles/tuned_amdgpu/templates/tuned.conf.j2
Normal file
|
@ -0,0 +1,22 @@
|
|||
[main]
|
||||
include={{ item.1 }}
|
||||
summary={{ item.1 }} + TCP/RAID tweaks + AMDGPU pp_power_profile_mode = {{ item.0.value.pwrmode }} ({{ item.0.key }})
|
||||
|
||||
[sysctl]
|
||||
net.core.default_qdisc=fq
|
||||
# 'bbr2' requires a [modified] supporting kernel - stock Fedora kernels do *not* support it (currently)
|
||||
# eg: 'kernel-xanmode-edge' from COPR 'rmnscnce/kernel-xanmod'
|
||||
net.ipv4.tcp_congestion_control=bbr2
|
||||
net.core.rmem_max=33554432
|
||||
net.core.wmem_max=33554432
|
||||
dev.raid.speed_limit_min=600000
|
||||
dev.raid.speed_limit_max=9000000
|
||||
# allow some games to run (eg: DayZ)
|
||||
vm.max_map_count=1048576
|
||||
|
||||
# reference/execute AMDGPU control script
|
||||
# used because some sysfs interfaces are transactional
|
||||
# cannot be set by a single param in 'tuned'/[sysfs]
|
||||
[gpuclockscript]
|
||||
type=script
|
||||
script=${i:PROFILE_DIR}/amdgpu-clock.sh
|
Loading…
Add table
Add a link
Reference in a new issue