#!/bin/bash # script for tuned AMDGPU clock control # configures GPU power/clock characteristics # clocks/power in 3D are dynamic based on need/usage # # for 'amdgpu-default' tuned profiles, this will reset the characteristics to default # for others this will apply overclocking settings -- leaving clock choices to the associated power profile (eg: VR) # # rendered by Ansible with environment-appropriate values: # card #, eg: card0 # path to discovered sysfs device files (power/clock/voltage control) # # AMDGPU driver/sysfs references: # https://01.org/linuxgraphics/gfx-docs/drm/gpu/amdgpu.html # https://docs.kernel.org/gpu/amdgpu/thermal.html {# done this way to avoid issues with the card number possibly shifting after playbook run #} # dynamically determine the connected GPU using the DRM subsystem CARD=$(/usr/bin/grep -ls ^connected /sys/class/drm/*/status | /usr/bin/grep -o 'card[0-9]' | /usr/bin/sort | /usr/bin/uniq | /usr/bin/sort -h | /usr/bin/tail -1) function get_hwmon_dir() { CARD_DIR="/sys/class/drm/${1}/device/" for CANDIDATE in "${CARD_DIR}"/hwmon/hwmon*; do if [[ -f "${CANDIDATE}"/power1_cap ]]; then # found a valid hwmon dir echo "${CANDIDATE}" fi done } # determine the hwmon directory HWMON_DIR=$(get_hwmon_dir "${CARD}") # read all of the power profiles, used to get the IDs for assignment later PROFILE_MODES=$(< /sys/class/drm/"${CARD}"/device/pp_power_profile_mode) # get power capability; later used determine limits read -r -d '' POWER_CAP < "$HWMON_DIR"/power1_cap_max # enable THP; profile enables the 'vm.compaction_proactiveness' sysctl # improves allocation latency echo 'always' | tee /sys/kernel/mm/transparent_hugepage/enabled {# begin the templated script for 'default' profiles to reset state #} {% if 'default' in profile_name %} # set control mode back to auto # attempts to dynamically set optimal power profile for (load) conditions echo 'auto' | tee /sys/class/drm/"${CARD}"/device/power_dpm_force_performance_level # reset any existing profile clock changes echo 'r' | tee /sys/class/drm/"${CARD}"/device/pp_od_clk_voltage # adjust power limit using multiplier against board capability POWER_LIM_DEFAULT=$(/usr/bin/awk -v m="$POWER_CAP" -v n={{ gpu_power_multi.default }} 'BEGIN {printf "%.0f", (m*n)}') echo "$POWER_LIM_DEFAULT" | tee "${HWMON_DIR}/power1_cap" # extract the power-saving profile ID number PROF_POWER_SAVING_NUM=$(/usr/bin/awk '$0 ~ /POWER_SAVING.*:/ {print $1}' <<< "$PROFILE_MODES") # reset power/clock heuristics to power-saving echo "${PROF_POWER_SAVING_NUM}" | tee /sys/class/drm/"${CARD}"/device/pp_power_profile_mode {% else %} {# begin the templated script for 'overclocked' AMD GPU profiles based on the existing tuned profiles #} # set the minimum GPU clock - for best performance, this should be near the maximum # RX6000 series power management *sucks* echo 's 0 {{ gpu_clock_min }}' | tee /sys/class/drm/"${CARD}"/device/pp_od_clk_voltage # set the maximum GPU clock echo 's 1 {{ gpu_clock_max }}' | tee /sys/class/drm/"${CARD}"/device/pp_od_clk_voltage # set the GPU *memory* clock # normally this would appear disregarded, memory clocked at the minimum allowed by the overdrive (OD) range # it follows the core clock; if both 0/1 profiles for _it_ are high enough, the memory will follow echo 'm 1 {{ gpumem_clock_static }}' | tee /sys/class/drm/"${CARD}"/device/pp_od_clk_voltage {% if gpu_mv_offset is defined %} # offset GPU voltage {{ gpu_mv_offset }}mV echo 'vo {{ gpu_mv_offset }}' | tee /sys/class/drm/"${CARD}"/device/pp_od_clk_voltage {% endif %} # commit the changes echo 'c' | tee /sys/class/drm/"${CARD}"/device/pp_od_clk_voltage # force GPU core and memory into highest clocks (fix flickering and poor power management) # set manual control mode # allows control via 'pp_dpm_mclk', 'pp_dpm_sclk', 'pp_dpm_pcie', 'pp_dpm_fclk', and 'pp_power_profile_mode' files echo 'manual' | tee /sys/class/drm/"${CARD}"/device/power_dpm_force_performance_level # adjust power limit using multiplier against board capability POWER_LIM_OC=$(/usr/bin/awk -v m="$POWER_CAP" -v n={{ gpu_power_multi.overclock }} 'BEGIN {printf "%.0f", (m*n)}') echo "$POWER_LIM_OC" | tee "${HWMON_DIR}/power1_cap" # pp_dpm_*clk settings are unintuitive, giving profiles that may be used echo '1' | tee /sys/class/drm/"${CARD}"/device/pp_dpm_sclk echo '3' | tee /sys/class/drm/"${CARD}"/device/pp_dpm_mclk echo '2' | tee /sys/class/drm/"${CARD}"/device/pp_dpm_fclk echo '2' | tee /sys/class/drm/"${CARD}"/device/pp_dpm_socclk # extract the VR power profile ID number PROF_VR_NUM=$(/usr/bin/awk '$0 ~ /VR.*:/ {print $1}' <<< "$PROFILE_MODES") # force 'overclocked' profile to 'VR' power/clock heuristics # latency/frame timing seemed favorable with relatively-close minimum clocks echo "${PROF_VR_NUM}" | tee /sys/class/drm/"${CARD}"/device/pp_power_profile_mode # note 4/8/2023: instead of 'manual'... try dealing with broken power management, force clocks to high # ref: https://gitlab.freedesktop.org/drm/amd/-/issues/1500 # followup: doesn't work that well in practice, still flaky on clocks/frame times #echo 'high' | tee /sys/class/drm/"${CARD}"/device/power_dpm_force_performance_level {% endif %}