From b525797f8f93833952dea3549f0255c685a71164 Mon Sep 17 00:00:00 2001 From: Josh Lay Date: Sun, 27 Nov 2022 00:51:39 -0600 Subject: [PATCH] role-ify --- host_vars/localhost.yml | 49 ------ playbook.yml | 143 +++++------------- .../tuned_amdgpu/defaults/main.yml | 9 +- roles/tuned_amdgpu/handlers/main.yml | 6 + roles/tuned_amdgpu/tasks/main.yml | 103 +++++++++++++ .../templates}/amdgpu-clock.sh.j2 | 0 .../tuned_amdgpu/templates}/tuned.conf.j2 | 0 7 files changed, 155 insertions(+), 155 deletions(-) delete mode 100644 host_vars/localhost.yml rename group_vars/all => roles/tuned_amdgpu/defaults/main.yml (58%) create mode 100644 roles/tuned_amdgpu/handlers/main.yml create mode 100644 roles/tuned_amdgpu/tasks/main.yml rename {templates => roles/tuned_amdgpu/templates}/amdgpu-clock.sh.j2 (100%) rename {templates => roles/tuned_amdgpu/templates}/tuned.conf.j2 (100%) diff --git a/host_vars/localhost.yml b/host_vars/localhost.yml deleted file mode 100644 index d354ee1..0000000 --- a/host_vars/localhost.yml +++ /dev/null @@ -1,49 +0,0 @@ ---- -# statically defined mapping of gpu profiles -# nets adjusted tuned profiles based on the contents in /sys/class/drm/card*/device/pp_power_profile_mode -# adds a power multiplier, see comments below for more info -# more reference driver profiles may be added, but do not remove default. -amdgpu_profiles: - default: - pwrmode: 0 - pwr_cap_multi: 0.789473684210526 # 255W - default - 3D: - pwrmode: 1 - pwr_cap_multi: 0.789473684210526 # 255W - default - VR: - pwrmode: 4 - pwr_cap_multi: 0.789473684210526 # 255W - default - compute: - pwrmode: 5 - pwr_cap_multi: 0.789473684210526 # 255W - default - custom: - pwrmode: 6 - pwr_cap_multi: 0.869969040247678 # 281W - slight boost - -# pwr_cap_multi is multiplier against *board power capability* to determine power limits on the associated profile -# 0.5 = 50%, 1.0 = 100% (of card power capability, not stock limits) -# -# should not exceed 1.0, must be a float. driver will do some rounding/stepping -# -# see 'power_max multi tab calculator.ods' for a calculator/spreadsheet -# adjust cell F14 (board max power) and the 'effective watts' column to update calculations -# microWatt board power capability can be discovered like so: 'cat /sys/class/drm/card*/device/hwmon/hwmon*/power1_cap_max' -# power_max_default_multi: 0.75 # 242.25W, slightly lower than true default -# -# minimum/maximum GPU clocks using 'powerplay' below -# these do *not* apply to the resulting 'amdgpu-default' tuned profile -# -# only the others (eg: 'VR'/'custom') under amdgpu_profiles (below) -# effective clocks are dynamically adjusted by the driver in this range - based on utilization -# can find baseline values in the '/sys/class/drm/{{ card }}/device/pp_od_clk_voltage' file -# OD_RANGE indicates acceptable value ranges for SCLK (core) and MCLK (memory) -# Under 'OD_SCLK' and 'OD_MCLK' you will see 0/1. These are the minimum/maximum values for the respective component. -gpu_clock_min: "500" # default 500 -gpu_clock_max: "2500" # default 2529 -# -# memory clocks are static, we only set a 'max' - high-refresh rate display quirk -# this allows the memory clock to be adjusted -gpumem_clock_max: "1050" -# -# optional, applies offset to GPU voltage, eg: '+100' = to boost GPU core voltage 100mV or 0.1V. for the 'custom' GPU profile. -# gpu_mv_offset: "+50" diff --git a/playbook.yml b/playbook.yml index 1935642..867a4b2 100644 --- a/playbook.yml +++ b/playbook.yml @@ -1,105 +1,44 @@ --- - hosts: localhost name: "Configure 'tuned' with AMD GPU control" - become: true - vars: - # list of source tuned profiles available on Fedora (TODO: should dynamically discover) - # further modified with AMD GPU power/clock parameters, creating new profiles. eg: 'balanced-amdgpu-VR' - base_profiles: - - balanced - - desktop - - latency-performance - - network-latency - - network-throughput - - powersave - - virtual-host - handlers: - - name: Restart tuned - ansible.builtin.service: - name: tuned - state: restarted - tasks: - - name: Gather package facts - ansible.builtin.package_facts: - manager: auto - - name: Replace 'power-profiles-daemon' with 'tuned' on Fedora 35+ - ansible.builtin.package: # use with_items/pkg since 'dnf' module in Ansible doesn't support 'swap' - name: "{{ item.name }}" - state: "{{ item.state }}" - with_items: - - {name: 'power-profiles-daemon', state: 'absent'} - - {name: 'tuned', state: 'present'} - when: - - ('power-profiles-daemon' in ansible_facts.packages) or ('tuned' not in ansible_facts.packages) - - ansible_distribution == 'Fedora' - - ansible_distribution_major_version|int > 35 - register: fed_ppdtuned_swap - # 'power-profiles-daemon' was added/conflicts with 'tuned' since F35 - # otherwise, ensure the 'tuned' package is installed - - name: Install tuned - ansible.builtin.package: - name: tuned - state: present - when: (fed_ppdtuned_swap is not defined) or ('tuned' not in ansible_facts.packages) - - name: Determine GPU device in drm subsystem - ansible.builtin.shell: - cmd: grep -ls ^connected /sys/class/drm/*/status | grep -o card[0-9] | sort | uniq | sort -h | tail -1 - executable: /bin/bash - changed_when: false - register: card - - name: Find hwmon/max power capability file for {{ card.stdout }} - ansible.builtin.find: - paths: /sys/class/drm/{{ card.stdout }}/device/hwmon - file_type: file - recurse: true - use_regex: true - patterns: - - '^power1_cap_max$' - register: hwmon - - name: Find hwmon/current power limit file for {{ card.stdout }} - ansible.builtin.find: - paths: /sys/class/drm/{{ card.stdout }}/device/hwmon - file_type: file - recurse: true - use_regex: true - patterns: - - '^power1_cap$' - register: powercap_set - - name: Get max power capability for {{ card.stdout }} - ansible.builtin.slurp: - src: "{{ hwmon.files.0.path }}" - register: power_max_b64 - - name: Create custom profile directories - ansible.builtin.file: - state: directory - path: /etc/tuned/{{ item.1 }}-amdgpu-{{ item.0.key }} - mode: "0755" - with_nested: - - "{{ lookup('dict', amdgpu_profiles) }}" - - "{{ base_profiles }}" - - name: Template AMDGPU control/reset scripts - ansible.builtin.template: - src: templates/amdgpu-clock.sh.j2 - dest: /etc/tuned/{{ item.1 }}-amdgpu-{{ item.0.key }}/amdgpu-clock.sh - owner: root - group: root - mode: "0755" - with_nested: - - "{{ lookup('dict', amdgpu_profiles) }}" - - "{{ base_profiles }}" - notify: restart tuned - - name: Template custom tuned profiles - ansible.builtin.template: - src: templates/tuned.conf.j2 - dest: /etc/tuned/{{ item.1 }}-amdgpu-{{ item.0.key }}/tuned.conf - owner: root - group: root - mode: "0644" - with_nested: - - "{{ lookup('dict', amdgpu_profiles) }}" - - "{{ base_profiles }}" - notify: restart tuned - - name: Ensure tuned is enabled - ansible.builtin.service: - name: tuned - enabled: true + roles: + # role prepares/modifies 'tuned' with AMD GPU power/clock parameters + # creates a new tuned profile made for each permutation of (base) 'tuned' profile + AMD powerplay profile + - role: tuned_amdgpu + # GPU *core* min/max (dynamic) clock, only applies to non-default amdgpu profiles + gpu_clock_min: "750" # default 500 + gpu_clock_max: "2600" # default 2529 + # GPU memory *static* clock, should also only apply for non-default amdgpu profiles + gpumem_clock_max: "1050" + # optional, applies offset to GPU voltage, eg: '+100' = to boost GPU core voltage 100mV or 0.1V. for the 'custom' GPU profile. + # gpu_mv_offset: "+50" + # list of source tuned profiles available on Fedora (TODO: should dynamically discover) + base_profiles: + - balanced + - desktop + - latency-performance + - network-latency + - network-throughput + - powersave + - virtual-host + # mapping of typical Navi generation power profiles from: + # /sys/class/drm/card*/device/pp_power_profile_mode + # ref: https://www.kernel.org/doc/html/v4.20/gpu/amdgpu.html#pp-power-profile-mode + amdgpu_profiles: + default: + pwrmode: 0 + pwr_cap_multi: 0.789473684210526 # 255W - default + 3D: + pwrmode: 1 + pwr_cap_multi: 0.789473684210526 # 255W - default + VR: + pwrmode: 4 + pwr_cap_multi: 0.789473684210526 # 255W - default + compute: + pwrmode: 5 + pwr_cap_multi: 0.789473684210526 # 255W - default + custom: + pwrmode: 6 + pwr_cap_multi: 0.869969040247678 # 281W - slight boost + # both dictionaries are merged to create new 'tuned' profiles. eg: + # 'balanced-amdgpu-default', 'balanced-amdgpu-3D', 'balanced-amdgpu-video' diff --git a/group_vars/all b/roles/tuned_amdgpu/defaults/main.yml similarity index 58% rename from group_vars/all rename to roles/tuned_amdgpu/defaults/main.yml index fe42e39..de80a28 100644 --- a/group_vars/all +++ b/roles/tuned_amdgpu/defaults/main.yml @@ -1,9 +1,10 @@ --- -# misc default vars handling unit conversion RE: power capabilities/limits +# defaults file for tuned_amdgpu # -# the discovered board limit for power capability (in microWatts) +# vars handling unit conversion RE: power capabilities/limits +# the discovered board limit for power capability; in microWatts, then converted power_max: "{{ power_max_b64['content'] | b64decode }}" -board_watts: "{{ power_max|int/1000000 }}" +board_watts: "{{ power_max | int / 1000000 }}" # internals for profile power calculations # item in the context of the with_nested loops in the play @@ -11,4 +12,4 @@ profile_name: "{{ item.0.key }}" profile_percentage: "{{ (item.0.value.pwr_cap_multi * 100.0) | round(2) }}" profile_multi: "{{ item.0.value.pwr_cap_multi }}" profile_microwatts: "{{ power_max | float * profile_multi | float }}" -profile_watts: "{{ profile_microwatts | int/1000000 }}" +profile_watts: "{{ profile_microwatts | int / 1000000 }}" diff --git a/roles/tuned_amdgpu/handlers/main.yml b/roles/tuned_amdgpu/handlers/main.yml new file mode 100644 index 0000000..60384eb --- /dev/null +++ b/roles/tuned_amdgpu/handlers/main.yml @@ -0,0 +1,6 @@ +--- +# handlers file for tuned_amdgpu +- name: Restart tuned + ansible.builtin.service: + name: tuned + state: restarted diff --git a/roles/tuned_amdgpu/tasks/main.yml b/roles/tuned_amdgpu/tasks/main.yml new file mode 100644 index 0000000..5f93274 --- /dev/null +++ b/roles/tuned_amdgpu/tasks/main.yml @@ -0,0 +1,103 @@ +--- +# tasks file for tuned_amdgpu +# +- name: Gather package facts + ansible.builtin.package_facts: + manager: auto + +# note: power-profiles-daemon conflicts with tuned +# since F35 it must be removed so tuned may be installed +- name: Replace 'power-profiles-daemon' with 'tuned' on Fedora 35+ + ansible.builtin.package: # use with_items/pkg since 'dnf' module in Ansible doesn't support 'swap' + name: "{{ item.name }}" + state: "{{ item.state }}" + with_items: + - {name: 'power-profiles-daemon', state: 'absent'} + - {name: 'tuned', state: 'present'} + when: + - ('power-profiles-daemon' in ansible_facts.packages) or ('tuned' not in ansible_facts.packages) + - ansible_distribution == 'Fedora' + - ansible_distribution_major_version|int > 35 + register: fed_ppdtuned_swap + become: true + +- name: Install tuned + ansible.builtin.package: + name: tuned + state: present + when: (fed_ppdtuned_swap is not defined) or ('tuned' not in ansible_facts.packages) + become: true + +- name: Determine GPU device in drm subsystem + ansible.builtin.shell: + cmd: grep -ls ^connected /sys/class/drm/*/status | grep -o card[0-9] | sort | uniq | sort -h | tail -1 + executable: /bin/bash + changed_when: false + register: card + +- name: Find hwmon/max power capability file for {{ card.stdout }} + ansible.builtin.find: + paths: /sys/class/drm/{{ card.stdout }}/device/hwmon + file_type: file + recurse: true + use_regex: true + patterns: + - '^power1_cap_max$' + register: hwmon + +- name: Find hwmon/current power limit file for {{ card.stdout }} + ansible.builtin.find: + paths: /sys/class/drm/{{ card.stdout }}/device/hwmon + file_type: file + recurse: true + use_regex: true + patterns: + - '^power1_cap$' + register: powercap_set + +- name: Get max power capability for {{ card.stdout }} + ansible.builtin.slurp: + src: "{{ hwmon.files.0.path }}" + register: power_max_b64 + +- name: Create custom profile directories + ansible.builtin.file: + state: directory + path: /etc/tuned/{{ item.1 }}-amdgpu-{{ item.0.key }} + mode: "0755" + with_nested: + - "{{ lookup('dict', amdgpu_profiles) }}" + - "{{ base_profiles }}" + become: true + +- name: Template AMDGPU control/reset scripts + ansible.builtin.template: + src: templates/amdgpu-clock.sh.j2 + dest: /etc/tuned/{{ item.1 }}-amdgpu-{{ item.0.key }}/amdgpu-clock.sh + owner: root + group: root + mode: "0755" + with_nested: + - "{{ lookup('dict', amdgpu_profiles) }}" + - "{{ base_profiles }}" + notify: Restart tuned + become: true + +- name: Template custom tuned profiles + ansible.builtin.template: + src: templates/tuned.conf.j2 + dest: /etc/tuned/{{ item.1 }}-amdgpu-{{ item.0.key }}/tuned.conf + owner: root + group: root + mode: "0644" + with_nested: + - "{{ lookup('dict', amdgpu_profiles) }}" + - "{{ base_profiles }}" + notify: Restart tuned + become: true + +- name: Ensure tuned is enabled + ansible.builtin.service: + name: tuned + enabled: true + become: true diff --git a/templates/amdgpu-clock.sh.j2 b/roles/tuned_amdgpu/templates/amdgpu-clock.sh.j2 similarity index 100% rename from templates/amdgpu-clock.sh.j2 rename to roles/tuned_amdgpu/templates/amdgpu-clock.sh.j2 diff --git a/templates/tuned.conf.j2 b/roles/tuned_amdgpu/templates/tuned.conf.j2 similarity index 100% rename from templates/tuned.conf.j2 rename to roles/tuned_amdgpu/templates/tuned.conf.j2