utils: make funcs require card, offer dict of discovery
This commit is contained in:
parent
016cb6844e
commit
8d633cf391
1 changed files with 145 additions and 114 deletions
|
@ -8,85 +8,64 @@ Variables:
|
|||
- hwmon_dir: the `hwmon` interface (dir) that provides stats for this card
|
||||
- SRC_FILES: dictionary of the known stats from the items in `hwmon_dir`
|
||||
- TEMP_FILES: dictionary of the *discovered* temperature nodes / stat files
|
||||
- POWER_DOMAINS: tuple of supported power domains: `average`, `limit`, `cap`, and `default`
|
||||
- CLOCK_DOMAINS: tuple of supported clock domains: `core`, `memory`
|
||||
"""
|
||||
# disable superfluous linting
|
||||
# pylint: disable=line-too-long
|
||||
from os import path
|
||||
import glob
|
||||
from typing import Tuple, Optional, Union
|
||||
from typing import Optional, Union
|
||||
from humanfriendly import format_size
|
||||
|
||||
|
||||
def find_card() -> Optional[Tuple[Optional[str], Optional[str]]]:
|
||||
"""Searches contents of /sys/class/drm/card*/device/hwmon/hwmon*/name
|
||||
def find_cards() -> dict:
|
||||
"""Searches contents of `/sys/class/drm/card*/device/hwmon/hwmon*/name`
|
||||
|
||||
... looking for 'amdgpu' to find a card to monitor
|
||||
Reads 'hwmon' names looking for 'amdgpu' to find cards to monitor.
|
||||
|
||||
If no AMD GPU found, this will be: (None, None)
|
||||
If device(s) found, returns a dictionary of cards with their hwmon directories.
|
||||
|
||||
If *none* found, this will be an empty dict.
|
||||
|
||||
Returns:
|
||||
tuple: ('cardN', '/hwmon/directory/with/stat/files')
|
||||
dict: `{'cardN': '/hwmon/directory/with/stat/files', 'cardY': '/other/hwmon/directory/for/cardY'}`
|
||||
"""
|
||||
_card = None
|
||||
_hwmon_dir = None
|
||||
hwmon_names_glob = '/sys/class/drm/card*/device/hwmon/hwmon*/name'
|
||||
hwmon_names = glob.glob(hwmon_names_glob)
|
||||
cards = {}
|
||||
card_glob_pattern = '/sys/class/drm/card*/device/hwmon/hwmon*/name'
|
||||
hwmon_names = glob.glob(card_glob_pattern)
|
||||
for hwmon_name_file in hwmon_names:
|
||||
with open(hwmon_name_file, "r", encoding="utf-8") as _f:
|
||||
if _f.read().strip() == 'amdgpu':
|
||||
# found an amdgpu
|
||||
# note: if multiple are found, last will be used/watched
|
||||
# will be configurable in the future, may prompt
|
||||
_card = hwmon_name_file.split('/')[4]
|
||||
_hwmon_dir = path.dirname(hwmon_name_file)
|
||||
return _card, _hwmon_dir
|
||||
cards[_card] = _hwmon_dir
|
||||
return cards
|
||||
|
||||
|
||||
# base vars: card identifier, hwmon directory for stats, then the stat dicts
|
||||
CARD, hwmon_dir = find_card()
|
||||
if CARD is not None:
|
||||
card_dir = path.join("/sys/class/drm/", CARD) # eg: /sys/class/drm/card0/
|
||||
|
||||
# dictionary of known source files
|
||||
# ref: https://docs.kernel.org/gpu/amdgpu/thermal.html
|
||||
SRC_FILES = {'pwr_limit': path.join(hwmon_dir, "power1_cap"),
|
||||
'pwr_average': path.join(hwmon_dir, "power1_average"),
|
||||
'pwr_cap': path.join(hwmon_dir, "power1_cap_max"),
|
||||
'pwr_default': path.join(hwmon_dir, "power1_cap_default"),
|
||||
'core_clock': path.join(hwmon_dir, "freq1_input"),
|
||||
'core_voltage': path.join(hwmon_dir, "in0_input"),
|
||||
'memory_clock': path.join(hwmon_dir, "freq2_input"),
|
||||
'busy_pct': path.join(card_dir, "device/gpu_busy_percent"),
|
||||
'temp_c': path.join(hwmon_dir, "temp1_input"),
|
||||
'fan_rpm': path.join(hwmon_dir, "fan1_input"),
|
||||
'fan_rpm_target': path.join(hwmon_dir, "fan1_target"),
|
||||
}
|
||||
|
||||
# determine temperature nodes, construct a dict to store them
|
||||
# interface will iterate over these, creating labels as needed
|
||||
TEMP_FILES = {}
|
||||
temp_node_labels = glob.glob(path.join(hwmon_dir, "temp*_label"))
|
||||
for temp_node_label_file in temp_node_labels:
|
||||
# determine the base node id, eg: temp1
|
||||
# construct the path to the file that will label it. ie: edge/junction
|
||||
temp_node_id = path.basename(temp_node_label_file).split('_')[0]
|
||||
temp_node_value_file = path.join(hwmon_dir, f"{temp_node_id}_input")
|
||||
with open(temp_node_label_file, 'r', encoding='utf-8') as _node:
|
||||
temp_node_name = _node.read().strip()
|
||||
# add the node name/type and the corresponding temp file to the dict
|
||||
TEMP_FILES[temp_node_name] = temp_node_value_file
|
||||
# discover all available AMD GPUs
|
||||
AMDGPU_CARDS = find_cards()
|
||||
# supported clock domains by 'get_clock' func
|
||||
# is concatenated with 'clock_' to index SRC_FILES for the relevant data file
|
||||
CLOCK_DOMAINS = ('core', 'memory')
|
||||
# defined outside/globally for efficiency -- it's called a lot in the TUI
|
||||
|
||||
|
||||
def read_stat(file: str) -> str:
|
||||
def read_stat(file: str, stat_type: Optional[str] = None) -> str:
|
||||
"""Read statistic `file`, return the stripped contents
|
||||
|
||||
Args:
|
||||
file (str): The statistic file to read/return
|
||||
|
||||
stat_type (str): Optional type, if specified - can convert data.
|
||||
|
||||
Returns:
|
||||
str: Statistics from `file`"""
|
||||
str: Statistics from `file`. If `stat_type='power'`, will convert mW to Watts"""
|
||||
with open(file, "r", encoding="utf-8") as _fh:
|
||||
data = _fh.read()
|
||||
return data.strip()
|
||||
data = _fh.read().strip()
|
||||
if stat_type == 'power':
|
||||
data = int(int(data) / 1000000)
|
||||
return data
|
||||
|
||||
|
||||
def format_frequency(frequency_hz: int) -> str:
|
||||
|
@ -103,47 +82,35 @@ def format_frequency(frequency_hz: int) -> str:
|
|||
)
|
||||
|
||||
|
||||
def get_power_stats() -> dict:
|
||||
def get_power_stats(card: str) -> dict:
|
||||
"""
|
||||
Args:
|
||||
card (str): Card identifier from `/dev/dri/`, ie: `card0`. See `AMDGPU_CARDS` or `find_cards()`
|
||||
|
||||
Returns:
|
||||
dict: A dictionary of current GPU *power* related statistics.
|
||||
|
||||
Example:
|
||||
`{'limit': int, 'average': int, 'capability': int, 'default': int}`
|
||||
"""
|
||||
return {"limit": get_gpu_power('limit'),
|
||||
"average": get_gpu_power('average'),
|
||||
"capability": get_gpu_power('cap'),
|
||||
"default": get_gpu_power('default')}
|
||||
if card in AMDGPU_CARDS:
|
||||
hwmon_dir = AMDGPU_CARDS[card]
|
||||
else:
|
||||
if len(AMDGPU_CARDS) > 0:
|
||||
raise ValueError(f"Invalid card: '{card}'. Must be one of: {list(AMDGPU_CARDS.keys())}")
|
||||
raise ValueError(f"Invalid card: '{card}', no AMD GPUs or hwmon directories found")
|
||||
|
||||
return {"limit": read_stat(path.join(hwmon_dir, "power1_cap"), stat_type='power'),
|
||||
"average": read_stat(path.join(hwmon_dir, "power1_average"), stat_type='power'),
|
||||
"capability": read_stat(path.join(hwmon_dir, "power1_cap_max"), stat_type='power'),
|
||||
"default": read_stat(path.join(hwmon_dir, "power1_cap_default"), stat_type='power')}
|
||||
|
||||
|
||||
# constant; supported power domains by 'get_gpu_power' func
|
||||
# is concatenated with 'pwr_' to index SRC_FILES for the relevant data file
|
||||
POWER_DOMAINS = ('limit', 'average', 'cap', 'default')
|
||||
# defined outside/globally for efficiency -- it's called a lot in the TUI
|
||||
|
||||
|
||||
def get_gpu_power(domain: str) -> int:
|
||||
def get_core_stats(card: str) -> dict:
|
||||
"""
|
||||
Args:
|
||||
domain (str): The GPU domain of interest regarding power
|
||||
card (str): Card identifier from `/dev/dri/`, ie: `card0`. See `AMDGPU_CARDS` or `find_cards()`
|
||||
|
||||
Must be one of POWER_DOMAINS:
|
||||
- limit: the effective limit placed on the card
|
||||
- default: the default limit
|
||||
- average: the average consumption
|
||||
- cap: the board capability
|
||||
|
||||
Returns:
|
||||
int: The requested GPU power statistic by domain, in Watts
|
||||
"""
|
||||
if domain not in POWER_DOMAINS:
|
||||
raise ValueError(f"Invalid power domain: '{domain}'. Must be one of: {POWER_DOMAINS}")
|
||||
return int(int(read_stat(SRC_FILES['pwr_' + domain])) / 1000000)
|
||||
|
||||
|
||||
def get_core_stats() -> dict:
|
||||
"""
|
||||
Returns:
|
||||
dict: A dictionary of current GPU *core/memory* related statistics.
|
||||
|
||||
|
@ -152,21 +119,22 @@ def get_core_stats() -> dict:
|
|||
Example:
|
||||
`{'sclk': int, 'mclk': int, 'voltage': float, 'util_pct': int}`
|
||||
"""
|
||||
return {"sclk": get_clock('core'),
|
||||
"mclk": get_clock('memory'),
|
||||
"voltage": get_voltage(),
|
||||
"util_pct": get_gpu_usage()}
|
||||
# verify card -- is it AMD, do we know the hwmon directory?
|
||||
if card in AMDGPU_CARDS:
|
||||
return {"sclk": get_clock(card, 'core'),
|
||||
"mclk": get_clock(card, 'memory'),
|
||||
"voltage": get_voltage(card),
|
||||
"util_pct": get_gpu_usage(card)}
|
||||
if len(AMDGPU_CARDS) > 0:
|
||||
raise ValueError(f"Invalid card: '{card}'. Must be one of: {list(AMDGPU_CARDS.keys())}")
|
||||
raise ValueError(f"Invalid card: '{card}', no AMD GPUs or hwmon directories found")
|
||||
|
||||
|
||||
# constant; supported clock domains by 'get_clock' func
|
||||
# is concatenated with 'clock_' to index SRC_FILES for the relevant data file
|
||||
CLOCK_DOMAINS = ('core', 'memory')
|
||||
# defined outside/globally for efficiency -- it's called a lot in the TUI
|
||||
|
||||
|
||||
def get_clock(domain: str, format_freq: bool = False) -> Union[int, str]:
|
||||
def get_clock(card: str, domain: str, format_freq: bool = False) -> Union[int, str]:
|
||||
"""
|
||||
Args:
|
||||
card (str): Card identifier from `/dev/dri/`, ie: `card0`. See `AMDGPU_CARDS` or `find_cards()`
|
||||
|
||||
domain (str): The GPU domain of interest regarding clock speed.
|
||||
Must be one of CLOCK_DOMAINS
|
||||
|
||||
|
@ -178,59 +146,102 @@ def get_clock(domain: str, format_freq: bool = False) -> Union[int, str]:
|
|||
If format_freq is True, a formatted string with Hz/MHz/GHz
|
||||
will be returned instead of an int
|
||||
"""
|
||||
# verify card -- is it AMD, do we know the hwmon directory?
|
||||
if card in AMDGPU_CARDS:
|
||||
hwmon_dir = AMDGPU_CARDS[card]
|
||||
else:
|
||||
if len(AMDGPU_CARDS) > 0:
|
||||
raise ValueError(f"Invalid card: '{card}'. Must be one of: {list(AMDGPU_CARDS.keys())}")
|
||||
raise ValueError(f"Invalid card: '{card}', no AMD GPUs or hwmon directories found")
|
||||
if domain not in CLOCK_DOMAINS:
|
||||
raise ValueError(f"Invalid clock domain: '{domain}'. Must be one of: {CLOCK_DOMAINS}")
|
||||
# set the clock file based on requested domain
|
||||
if domain == 'core':
|
||||
clock_file = path.join(hwmon_dir, "freq1_input")
|
||||
elif domain == 'memory':
|
||||
clock_file = path.join(hwmon_dir, "freq2_input")
|
||||
# handle output processing
|
||||
if format_freq:
|
||||
return format_frequency(read_stat(SRC_FILES[domain + '_clock']))
|
||||
return int(read_stat(SRC_FILES[domain + '_clock']))
|
||||
return format_frequency(int(read_stat(clock_file)))
|
||||
return int(read_stat(clock_file))
|
||||
|
||||
|
||||
def get_voltage() -> float:
|
||||
def get_voltage(card: str) -> float:
|
||||
"""
|
||||
Args:
|
||||
card (str): Card identifier from `/dev/dri/`, ie: `card0`. See `AMDGPU_CARDS` or `find_cards()`
|
||||
|
||||
Returns:
|
||||
float: The current GPU core voltage
|
||||
"""
|
||||
return round(int(read_stat(SRC_FILES['core_voltage'])) / 1000.0, 2)
|
||||
# verify card -- is it AMD, do we know the hwmon directory?
|
||||
if card in AMDGPU_CARDS:
|
||||
hwmon_dir = AMDGPU_CARDS[card]
|
||||
else:
|
||||
if len(AMDGPU_CARDS) > 0:
|
||||
raise ValueError(f"Invalid card: '{card}'. Must be one of: {list(AMDGPU_CARDS.keys())}")
|
||||
raise ValueError(f"Invalid card: '{card}', no AMD GPUs or hwmon directories found")
|
||||
return round(int(read_stat(path.join(hwmon_dir, "in0_input"))) / 1000.0, 2)
|
||||
|
||||
|
||||
def get_fan_stats() -> dict:
|
||||
def get_fan_rpm(card: str) -> int:
|
||||
"""
|
||||
Returns:
|
||||
dict: A dictionary of current GPU *fan* related statistics.
|
||||
Args:
|
||||
card (str): Card identifier from `/dev/dri/`, ie: `card0`. See `AMDGPU_CARDS` or `find_cards()`
|
||||
|
||||
Example:
|
||||
`{'fan_rpm': int, 'fan_rpm_target': int}`
|
||||
"""
|
||||
return {"fan_rpm": get_fan_rpm(),
|
||||
"fan_rpm_target": get_fan_target()}
|
||||
|
||||
|
||||
def get_fan_rpm() -> int:
|
||||
"""
|
||||
Returns:
|
||||
int: The current fan RPM
|
||||
"""
|
||||
return int(read_stat(SRC_FILES['fan_rpm']))
|
||||
# verify card -- is it AMD, do we know the hwmon directory?
|
||||
if card in AMDGPU_CARDS:
|
||||
hwmon_dir = AMDGPU_CARDS[card]
|
||||
else:
|
||||
if len(AMDGPU_CARDS) > 0:
|
||||
raise ValueError(f"Invalid card: '{card}'. Must be one of: {list(AMDGPU_CARDS.keys())}")
|
||||
raise ValueError(f"Invalid card: '{card}', no AMD GPUs or hwmon directories found")
|
||||
return int(read_stat(path.join(hwmon_dir, "fan1_input")))
|
||||
|
||||
|
||||
def get_fan_target() -> int:
|
||||
def get_fan_target(card: str) -> int:
|
||||
"""
|
||||
Args:
|
||||
card (str): Card identifier from `/dev/dri/`, ie: `card0`. See `AMDGPU_CARDS` or `find_cards()`
|
||||
|
||||
Returns:
|
||||
int: The current fan RPM
|
||||
"""
|
||||
return int(read_stat(SRC_FILES['fan_rpm_target']))
|
||||
# verify card -- is it AMD, do we know the hwmon directory?
|
||||
if card in AMDGPU_CARDS:
|
||||
hwmon_dir = AMDGPU_CARDS[card]
|
||||
else:
|
||||
if len(AMDGPU_CARDS) > 0:
|
||||
raise ValueError(f"Invalid card: '{card}'. Must be one of: {list(AMDGPU_CARDS.keys())}")
|
||||
raise ValueError(f"Invalid card: '{card}', no AMD GPUs or hwmon directories found")
|
||||
return int(read_stat(path.join(hwmon_dir, "fan1_target")))
|
||||
|
||||
|
||||
def get_gpu_usage() -> int:
|
||||
def get_gpu_usage(card: str) -> int:
|
||||
"""
|
||||
Args:
|
||||
card (str): Card identifier from `/dev/dri/`, ie: `card0`. See `AMDGPU_CARDS` or `find_cards()`
|
||||
|
||||
Returns:
|
||||
int: The current GPU usage/utilization as a percentage
|
||||
"""
|
||||
return int(read_stat(SRC_FILES['busy_pct']))
|
||||
if card in AMDGPU_CARDS:
|
||||
stat_file = path.join("/sys/class/drm/", card, "device/gpu_busy_percent")
|
||||
else:
|
||||
if len(AMDGPU_CARDS) > 0:
|
||||
raise ValueError(f"Invalid card: '{card}'. Must be one of: {list(AMDGPU_CARDS.keys())}")
|
||||
raise ValueError(f"Invalid card: '{card}', no AMD GPUs or hwmon directories found")
|
||||
return int(read_stat(stat_file))
|
||||
|
||||
|
||||
def get_temp_stats() -> dict:
|
||||
def get_temp_stats(card: str) -> dict:
|
||||
"""
|
||||
Args:
|
||||
card (str): Card identifier from `/dev/dri/`, ie: `card0`. See `AMDGPU_CARDS` or `find_cards()`
|
||||
|
||||
Returns:
|
||||
dict: A dictionary of current GPU *temperature* related statistics.
|
||||
|
||||
|
@ -243,8 +254,28 @@ def get_temp_stats() -> dict:
|
|||
|
||||
Returned values are converted to C, as integers for simple comparison
|
||||
"""
|
||||
if card in AMDGPU_CARDS:
|
||||
hwmon_dir = AMDGPU_CARDS[card]
|
||||
else:
|
||||
if len(AMDGPU_CARDS) > 0:
|
||||
raise ValueError(f"Invalid card: '{card}'. Must be one of: {list(AMDGPU_CARDS.keys())}")
|
||||
raise ValueError(f"Invalid card: '{card}', no AMD GPUs or hwmon directories found")
|
||||
# determine temperature nodes, construct a dict to store them
|
||||
# interface will iterate over these, creating labels as needed
|
||||
temp_files = {}
|
||||
temp_node_labels = glob.glob(path.join(hwmon_dir, "temp*_label"))
|
||||
for temp_node_label_file in temp_node_labels:
|
||||
# determine the base node id, eg: temp1
|
||||
# construct the path to the file that will label it. ie: edge/junction
|
||||
temp_node_id = path.basename(temp_node_label_file).split('_')[0]
|
||||
temp_node_value_file = path.join(hwmon_dir, f"{temp_node_id}_input")
|
||||
with open(temp_node_label_file, 'r', encoding='utf-8') as _node:
|
||||
temp_node_name = _node.read().strip()
|
||||
# add the node name/type and the corresponding temp file to the dict
|
||||
temp_files[temp_node_name] = temp_node_value_file
|
||||
|
||||
temp_update = {}
|
||||
for temp_node, temp_file in TEMP_FILES.items():
|
||||
for temp_node, temp_file in temp_files.items():
|
||||
# iterate through the discovered temperature nodes
|
||||
# ... updating the dictionary with new stats
|
||||
_temperature = int(int(read_stat(temp_file)) // 1000)
|
||||
|
|
Reference in a new issue