From c833a9a36e73918a7c709b36c498b8c98a0c4a5f Mon Sep 17 00:00:00 2001 From: Josh Lay Date: Sat, 3 Jun 2023 17:40:20 -0500 Subject: [PATCH] refactor, two profiles --- README.md | 5 +- playbook.yml | 42 +++------ roles/tuned_amdgpu/defaults/main.yml | 16 ---- roles/tuned_amdgpu/tasks/main.yml | 32 ------- .../tuned_amdgpu/templates/amdgpu-clock.sh.j2 | 85 ++++++++++++++----- roles/tuned_amdgpu/templates/tuned.conf.j2 | 8 +- 6 files changed, 82 insertions(+), 106 deletions(-) diff --git a/README.md b/README.md index d59c9e9..95fd9a8 100644 --- a/README.md +++ b/README.md @@ -5,8 +5,9 @@ Hacky solution to integrate AMDGPU power control and overclocking in `tuned` wit Takes a list of existing `tuned` profiles and creates new ones based on them. These new profiles include AMDGPU power/clock parameters An attempt is made to discover the active GPU using the 'connected' state in the `DRM` subsystem, example: -``` -$ grep -ls ^connected /sys/class/drm/*/status | grep -o card[0-9] | sort | uniq | sort -h | tail -1 + +```bash +~ $ grep -ls ^connected /sys/class/drm/*/status | grep -o card[0-9] | sort | uniq | sort -h | tail -1 card1 ``` diff --git a/playbook.yml b/playbook.yml index e710f7b..ec0ffab 100644 --- a/playbook.yml +++ b/playbook.yml @@ -7,15 +7,22 @@ - role: tuned_amdgpu # note: 'gpu_*' vars only apply with the 'custom' suffixed profiles created by this tooling # profiles based on the 'default' amdgpu power profile mode use default clocks - gpu_clock_min: "500" # default 500 - gpu_clock_max: "2615" # default 2529 + # + # the connected AMD GPU is automatically discovered - assumes one + # on swap to other AMD cards to avoid instability: + # 'rm -rfv /etc/tuned/*amdgpu*' + gpu_clock_min: "2200" # default 500, for best performance: near maximum. applies with 'overclock' tuned profile + gpu_clock_max: "2725" # default somewhere around 2529 to 2660 gpumem_clock_static: "1075" gpu_power_multi: - default: 0.789473684210526 # 255W - slightly reduced, maintains clocks well - overclock: 0.869969040247678 # 281W - real default, board supports up to 323W (1.0) + default: 0.869969040247678 # 281W - real default +# overclock: 0.928792569659443 # 300W - slight boost + overclock: 1.0 # 323W - full board capability # optional, applies offset (+/-) to GPU voltage by provided mV # gpu_mv_offset: "-25" + gu_mv_offset: "+75" # add 50mV or 0.075V # '-50' undervolts GPU core voltage 50mV or 0.05V + # mostly untested, there be dragons/instability # # list of source tuned profiles available on Fedora (TODO: should dynamically discover) base_profiles: @@ -26,30 +33,3 @@ - network-throughput - powersave - virtual-host - # - # mapping of typical Navi generation power profiles from: - # /sys/class/drm/card*/device/pp_power_profile_mode - # ref: https://www.kernel.org/doc/html/v4.20/gpu/amdgpu.html#pp-power-profile-mode - # 'pwr_cap_multi' is multiplied against board *limit* to determine profile wattage; 0.5 = 50% - # values below reflect my 6900XT -# amdgpu_profiles: -# default: -# pwrmode: 0 -# pwr_cap_multi: 0.789473684210526 # 255W - slightly reduced, maintains clocks well -# 3D: -# pwrmode: 1 -# pwr_cap_multi: 0.869969040247678 # 281W - default -# powersave: -# pwrmode: 2 -# pwr_cap_multi: 0.869969040247678 -# VR: -# pwrmode: 4 -# pwr_cap_multi: 0.869969040247678 -# compute: -# pwrmode: 5 -# pwr_cap_multi: 0.869969040247678 -# custom: -# pwrmode: 6 -# pwr_cap_multi: 1.0 # 323W - full capability - # both dictionaries are merged to create new 'tuned' profiles. eg: - # 'balanced-amdgpu-default', 'balanced-amdgpu-3D', 'balanced-amdgpu-video' diff --git a/roles/tuned_amdgpu/defaults/main.yml b/roles/tuned_amdgpu/defaults/main.yml index aa86c15..14a9581 100644 --- a/roles/tuned_amdgpu/defaults/main.yml +++ b/roles/tuned_amdgpu/defaults/main.yml @@ -1,27 +1,11 @@ --- # defaults file for tuned_amdgpu # -# vars handling unit conversion RE: power capabilities/limits -# the discovered board limit for power capability; in microWatts, then converted -power_max: "{{ power_max_b64['content'] | b64decode }}" -board_watts: "{{ power_max | int / 1000000 }}" # internals for profile power calculations # item in the context of the with_nested loops in the play profile_name: "{{ item.0 }}" -# determine percentage for human-friendly comments -power_default_pct: "{{ (gpu_power_multi.default * 100.0) | round(2) }}" -power_oc_pct: "{{ (gpu_power_multi.overclock * 100.0) | round(2) }}" - -# in microWatts, actually written to sysfs -power_default_mw: "{{ (power_max | float) * (gpu_power_multi.default | float) }}" -power_oc_mw: "{{ (power_max | float) * (gpu_power_multi.overclock | float) }}" - -# wattages - more human-friendly comments -power_default_watts: "{{ (power_default_mw | int) / 1000000 }}" -power_oc_watts: "{{ (power_oc_mw | int) / 1000000 }}" - amdgpu_profiles: - default - overclock diff --git a/roles/tuned_amdgpu/tasks/main.yml b/roles/tuned_amdgpu/tasks/main.yml index c428dda..a7bc037 100644 --- a/roles/tuned_amdgpu/tasks/main.yml +++ b/roles/tuned_amdgpu/tasks/main.yml @@ -28,38 +28,6 @@ when: (fed_ppdtuned_swap is not defined) or ('tuned' not in ansible_facts.packages) become: true -- name: Determine GPU device in drm subsystem - ansible.builtin.shell: - cmd: grep -ls ^connected /sys/class/drm/*/status | grep -o card[0-9] | sort | uniq | sort -h | tail -1 - executable: /bin/bash - changed_when: false - register: card - -- name: Find hwmon/max power capability file for {{ card.stdout }} - ansible.builtin.find: - paths: /sys/class/drm/{{ card.stdout }}/device/hwmon - file_type: file - recurse: true - use_regex: true - patterns: - - '^power1_cap_max$' - register: hwmon - -- name: Find hwmon/current power limit file for {{ card.stdout }} - ansible.builtin.find: - paths: /sys/class/drm/{{ card.stdout }}/device/hwmon - file_type: file - recurse: true - use_regex: true - patterns: - - '^power1_cap$' - register: powercap_set - -- name: Get max power capability for {{ card.stdout }} - ansible.builtin.slurp: - src: "{{ hwmon.files.0.path }}" - register: power_max_b64 - - name: Create custom profile directories ansible.builtin.file: state: directory diff --git a/roles/tuned_amdgpu/templates/amdgpu-clock.sh.j2 b/roles/tuned_amdgpu/templates/amdgpu-clock.sh.j2 index 28af80a..cc2dd2a 100644 --- a/roles/tuned_amdgpu/templates/amdgpu-clock.sh.j2 +++ b/roles/tuned_amdgpu/templates/amdgpu-clock.sh.j2 @@ -18,6 +18,29 @@ # dynamically determine the connected GPU using the DRM subsystem CARD=$(/usr/bin/grep -ls ^connected /sys/class/drm/*/status | /usr/bin/grep -o 'card[0-9]' | /usr/bin/sort | /usr/bin/uniq | /usr/bin/sort -h | /usr/bin/tail -1) +function get_hwmon_dir() { + CARD_DIR="/sys/class/drm/${1}/device/" + for CANDIDATE in "${CARD_DIR}"/hwmon/hwmon*; do + if [[ -f "${CANDIDATE}"/power1_cap ]]; then + # found a valid hwmon dir + echo "${CANDIDATE}" + fi + done +} + +# determine the hwmon directory +HWMON_DIR=$(get_hwmon_dir "${CARD}") + +# read all of the power profiles, used to get the IDs for assignment later +PROFILE_MODES=$(< /sys/class/drm/"${CARD}"/device/pp_power_profile_mode) + +# get power capability; later used determine limits +read -r -d '' POWER_CAP < "$HWMON_DIR"/power1_cap_max + +# enable THP; profile enables the 'vm.compaction_proactiveness' sysctl +# improves allocation latency +echo 'always' | tee /sys/kernel/mm/transparent_hugepage/enabled + {# begin the templated script for 'default' profiles to reset state #} {% if 'default' in profile_name %} # set control mode back to auto @@ -27,29 +50,27 @@ echo 'auto' | tee /sys/class/drm/"${CARD}"/device/power_dpm_force_performance_le # reset any existing profile clock changes echo 'r' | tee /sys/class/drm/"${CARD}"/device/pp_od_clk_voltage -# give '{{ profile_name }}' profile ~{{ power_default_pct }}% (rounded) of the max power capability -# {{ power_default_watts }} Watts of {{ board_watts }} total -echo '{{ power_default_mw | int }}' | tee '{{ powercap_set.files.0.path }}' +# adjust power limit using multiplier against board capability +POWER_LIM_DEFAULT=$(/usr/bin/awk -v m="$POWER_CAP" -v n={{ gpu_power_multi.default }} 'BEGIN {printf "%.0f", (m*n)}') +echo "$POWER_LIM_DEFAULT" | tee "${HWMON_DIR}/power1_cap" + +# extract the power-saving profile ID number +PROF_POWER_SAVING_NUM=$(/usr/bin/awk '$0 ~ /POWER_SAVING.*:/ {print $1}' <<< "$PROFILE_MODES") + +# reset power/clock heuristics to power-saving +echo "${PROF_POWER_SAVING_NUM}" | tee /sys/class/drm/"${CARD}"/device/pp_power_profile_mode {% else %} -{# begin the templated script for non-default AMD GPU profiles, eg: 'VR' or '3D_FULL_SCREEN' #} -# set manual control mode -# allows control via 'pp_dpm_mclk', 'pp_dpm_sclk', 'pp_dpm_pcie', 'pp_dpm_fclk', and 'pp_power_profile_mode' files -# only interested in 'pp_power_profile_mode' for power and 'pp_dpm_mclk' for memory clock (flickering). -# GPU clocks are dynamic based on (load) condition -#echo 'manual' | tee /sys/class/drm/"${CARD}"/device/power_dpm_force_performance_level - -# give '{{ profile_name }}' profile ~{{ power_oc_pct }}% (rounded) of the max power capability -# {{ power_oc_watts }} Watts of {{ board_watts }} total -echo '{{ power_oc_mw | int }}' | tee '{{ powercap_set.files.0.path }}' - -# set the minimum GPU clock +{# begin the templated script for 'overclocked' AMD GPU profiles based on the existing tuned profiles #} +# set the minimum GPU clock - for best performance, this should be near the maximum +# RX6000 series power management *sucks* echo 's 0 {{ gpu_clock_min }}' | tee /sys/class/drm/"${CARD}"/device/pp_od_clk_voltage # set the maximum GPU clock echo 's 1 {{ gpu_clock_max }}' | tee /sys/class/drm/"${CARD}"/device/pp_od_clk_voltage -# set the minimum / maximum GPU *memory* clock - force it high -echo 'm 0 {{ gpumem_clock_static }}' | tee /sys/class/drm/"${CARD}"/device/pp_od_clk_voltage +# set the GPU *memory* clock +# normally this would appear disregarded, memory clocked at the minimum allowed by the overdrive (OD) range +# it follows the core clock; if both 0/1 profiles for _it_ are high enough, the memory will follow echo 'm 1 {{ gpumem_clock_static }}' | tee /sys/class/drm/"${CARD}"/device/pp_od_clk_voltage {% if gpu_mv_offset is defined %} @@ -60,12 +81,30 @@ echo 'vo {{ gpu_mv_offset }}' | tee /sys/class/drm/"${CARD}"/device/pp_od_clk_vo # commit the changes echo 'c' | tee /sys/class/drm/"${CARD}"/device/pp_od_clk_voltage -# force GPU memory into highest clock (fix flickering) -# pp_dpm_*clk settings are unintuitive, giving profiles that may be used -# opt not to set the others (eg: sclk/fclk) - those should remain for benefits from the curve -# echo '3' | tee /sys/class/drm/"${CARD}"/device/pp_dpm_mclk +# force GPU core and memory into highest clocks (fix flickering and poor power management) +# set manual control mode +# allows control via 'pp_dpm_mclk', 'pp_dpm_sclk', 'pp_dpm_pcie', 'pp_dpm_fclk', and 'pp_power_profile_mode' files +echo 'manual' | tee /sys/class/drm/"${CARD}"/device/power_dpm_force_performance_level -# note 4/8/2023: instead of 'manual'... deal with broken power management, force clocks to high +# adjust power limit using multiplier against board capability +POWER_LIM_OC=$(/usr/bin/awk -v m="$POWER_CAP" -v n={{ gpu_power_multi.overclock }} 'BEGIN {printf "%.0f", (m*n)}') +echo "$POWER_LIM_OC" | tee "${HWMON_DIR}/power1_cap" + +# pp_dpm_*clk settings are unintuitive, giving profiles that may be used +echo '1' | tee /sys/class/drm/"${CARD}"/device/pp_dpm_sclk +echo '3' | tee /sys/class/drm/"${CARD}"/device/pp_dpm_mclk +echo '2' | tee /sys/class/drm/"${CARD}"/device/pp_dpm_fclk +echo '2' | tee /sys/class/drm/"${CARD}"/device/pp_dpm_socclk + +# extract the VR power profile ID number +PROF_VR_NUM=$(/usr/bin/awk '$0 ~ /VR.*:/ {print $1}' <<< "$PROFILE_MODES") + +# force 'overclocked' profile to 'VR' power/clock heuristics +# latency/frame timing seemed favorable with relatively-close minimum clocks +echo "${PROF_VR_NUM}" | tee /sys/class/drm/"${CARD}"/device/pp_power_profile_mode + +# note 4/8/2023: instead of 'manual'... try dealing with broken power management, force clocks to high # ref: https://gitlab.freedesktop.org/drm/amd/-/issues/1500 -echo 'high' | tee /sys/class/drm/"${CARD}"/device/power_dpm_force_performance_level +# followup: doesn't work that well in practice, still flaky on clocks/frame times +#echo 'high' | tee /sys/class/drm/"${CARD}"/device/power_dpm_force_performance_level {% endif %} diff --git a/roles/tuned_amdgpu/templates/tuned.conf.j2 b/roles/tuned_amdgpu/templates/tuned.conf.j2 index ae98cd9..729e025 100644 --- a/roles/tuned_amdgpu/templates/tuned.conf.j2 +++ b/roles/tuned_amdgpu/templates/tuned.conf.j2 @@ -11,8 +11,12 @@ net.core.default_qdisc=fq net.ipv4.tcp_congestion_control=bbr2 net.core.rmem_max=33554432 net.core.wmem_max=33554432 -dev.raid.speed_limit_min=600000 -dev.raid.speed_limit_max=9000000 +dev.raid.speed_limit_min=1000000 +dev.raid.speed_limit_max=6000000 +# improve THP allocation latency, compact in background +vm.compaction_proactiveness=30 +# make page lock theft slightly more fair +vm.page_lock_unfairness=1 # allow some games to run (eg: DayZ) vm.max_map_count=1048576