utils: make funcs require card, offer dict of discovery
This commit is contained in:
parent
016cb6844e
commit
8d633cf391
1 changed files with 145 additions and 114 deletions
|
@ -8,85 +8,64 @@ Variables:
|
||||||
- hwmon_dir: the `hwmon` interface (dir) that provides stats for this card
|
- hwmon_dir: the `hwmon` interface (dir) that provides stats for this card
|
||||||
- SRC_FILES: dictionary of the known stats from the items in `hwmon_dir`
|
- SRC_FILES: dictionary of the known stats from the items in `hwmon_dir`
|
||||||
- TEMP_FILES: dictionary of the *discovered* temperature nodes / stat files
|
- TEMP_FILES: dictionary of the *discovered* temperature nodes / stat files
|
||||||
- POWER_DOMAINS: tuple of supported power domains: `average`, `limit`, `cap`, and `default`
|
|
||||||
- CLOCK_DOMAINS: tuple of supported clock domains: `core`, `memory`
|
- CLOCK_DOMAINS: tuple of supported clock domains: `core`, `memory`
|
||||||
"""
|
"""
|
||||||
# disable superfluous linting
|
# disable superfluous linting
|
||||||
# pylint: disable=line-too-long
|
# pylint: disable=line-too-long
|
||||||
from os import path
|
from os import path
|
||||||
import glob
|
import glob
|
||||||
from typing import Tuple, Optional, Union
|
from typing import Optional, Union
|
||||||
from humanfriendly import format_size
|
from humanfriendly import format_size
|
||||||
|
|
||||||
|
|
||||||
def find_card() -> Optional[Tuple[Optional[str], Optional[str]]]:
|
def find_cards() -> dict:
|
||||||
"""Searches contents of /sys/class/drm/card*/device/hwmon/hwmon*/name
|
"""Searches contents of `/sys/class/drm/card*/device/hwmon/hwmon*/name`
|
||||||
|
|
||||||
... looking for 'amdgpu' to find a card to monitor
|
Reads 'hwmon' names looking for 'amdgpu' to find cards to monitor.
|
||||||
|
|
||||||
If no AMD GPU found, this will be: (None, None)
|
If device(s) found, returns a dictionary of cards with their hwmon directories.
|
||||||
|
|
||||||
|
If *none* found, this will be an empty dict.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
tuple: ('cardN', '/hwmon/directory/with/stat/files')
|
dict: `{'cardN': '/hwmon/directory/with/stat/files', 'cardY': '/other/hwmon/directory/for/cardY'}`
|
||||||
"""
|
"""
|
||||||
_card = None
|
cards = {}
|
||||||
_hwmon_dir = None
|
card_glob_pattern = '/sys/class/drm/card*/device/hwmon/hwmon*/name'
|
||||||
hwmon_names_glob = '/sys/class/drm/card*/device/hwmon/hwmon*/name'
|
hwmon_names = glob.glob(card_glob_pattern)
|
||||||
hwmon_names = glob.glob(hwmon_names_glob)
|
|
||||||
for hwmon_name_file in hwmon_names:
|
for hwmon_name_file in hwmon_names:
|
||||||
with open(hwmon_name_file, "r", encoding="utf-8") as _f:
|
with open(hwmon_name_file, "r", encoding="utf-8") as _f:
|
||||||
if _f.read().strip() == 'amdgpu':
|
if _f.read().strip() == 'amdgpu':
|
||||||
# found an amdgpu
|
# found an amdgpu
|
||||||
# note: if multiple are found, last will be used/watched
|
|
||||||
# will be configurable in the future, may prompt
|
|
||||||
_card = hwmon_name_file.split('/')[4]
|
_card = hwmon_name_file.split('/')[4]
|
||||||
_hwmon_dir = path.dirname(hwmon_name_file)
|
_hwmon_dir = path.dirname(hwmon_name_file)
|
||||||
return _card, _hwmon_dir
|
cards[_card] = _hwmon_dir
|
||||||
|
return cards
|
||||||
|
|
||||||
|
|
||||||
# base vars: card identifier, hwmon directory for stats, then the stat dicts
|
# discover all available AMD GPUs
|
||||||
CARD, hwmon_dir = find_card()
|
AMDGPU_CARDS = find_cards()
|
||||||
if CARD is not None:
|
# supported clock domains by 'get_clock' func
|
||||||
card_dir = path.join("/sys/class/drm/", CARD) # eg: /sys/class/drm/card0/
|
# is concatenated with 'clock_' to index SRC_FILES for the relevant data file
|
||||||
|
CLOCK_DOMAINS = ('core', 'memory')
|
||||||
# dictionary of known source files
|
# defined outside/globally for efficiency -- it's called a lot in the TUI
|
||||||
# ref: https://docs.kernel.org/gpu/amdgpu/thermal.html
|
|
||||||
SRC_FILES = {'pwr_limit': path.join(hwmon_dir, "power1_cap"),
|
|
||||||
'pwr_average': path.join(hwmon_dir, "power1_average"),
|
|
||||||
'pwr_cap': path.join(hwmon_dir, "power1_cap_max"),
|
|
||||||
'pwr_default': path.join(hwmon_dir, "power1_cap_default"),
|
|
||||||
'core_clock': path.join(hwmon_dir, "freq1_input"),
|
|
||||||
'core_voltage': path.join(hwmon_dir, "in0_input"),
|
|
||||||
'memory_clock': path.join(hwmon_dir, "freq2_input"),
|
|
||||||
'busy_pct': path.join(card_dir, "device/gpu_busy_percent"),
|
|
||||||
'temp_c': path.join(hwmon_dir, "temp1_input"),
|
|
||||||
'fan_rpm': path.join(hwmon_dir, "fan1_input"),
|
|
||||||
'fan_rpm_target': path.join(hwmon_dir, "fan1_target"),
|
|
||||||
}
|
|
||||||
|
|
||||||
# determine temperature nodes, construct a dict to store them
|
|
||||||
# interface will iterate over these, creating labels as needed
|
|
||||||
TEMP_FILES = {}
|
|
||||||
temp_node_labels = glob.glob(path.join(hwmon_dir, "temp*_label"))
|
|
||||||
for temp_node_label_file in temp_node_labels:
|
|
||||||
# determine the base node id, eg: temp1
|
|
||||||
# construct the path to the file that will label it. ie: edge/junction
|
|
||||||
temp_node_id = path.basename(temp_node_label_file).split('_')[0]
|
|
||||||
temp_node_value_file = path.join(hwmon_dir, f"{temp_node_id}_input")
|
|
||||||
with open(temp_node_label_file, 'r', encoding='utf-8') as _node:
|
|
||||||
temp_node_name = _node.read().strip()
|
|
||||||
# add the node name/type and the corresponding temp file to the dict
|
|
||||||
TEMP_FILES[temp_node_name] = temp_node_value_file
|
|
||||||
|
|
||||||
|
|
||||||
def read_stat(file: str) -> str:
|
def read_stat(file: str, stat_type: Optional[str] = None) -> str:
|
||||||
"""Read statistic `file`, return the stripped contents
|
"""Read statistic `file`, return the stripped contents
|
||||||
|
|
||||||
|
Args:
|
||||||
|
file (str): The statistic file to read/return
|
||||||
|
|
||||||
|
stat_type (str): Optional type, if specified - can convert data.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
str: Statistics from `file`"""
|
str: Statistics from `file`. If `stat_type='power'`, will convert mW to Watts"""
|
||||||
with open(file, "r", encoding="utf-8") as _fh:
|
with open(file, "r", encoding="utf-8") as _fh:
|
||||||
data = _fh.read()
|
data = _fh.read().strip()
|
||||||
return data.strip()
|
if stat_type == 'power':
|
||||||
|
data = int(int(data) / 1000000)
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
def format_frequency(frequency_hz: int) -> str:
|
def format_frequency(frequency_hz: int) -> str:
|
||||||
|
@ -103,47 +82,35 @@ def format_frequency(frequency_hz: int) -> str:
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def get_power_stats() -> dict:
|
def get_power_stats(card: str) -> dict:
|
||||||
"""
|
"""
|
||||||
|
Args:
|
||||||
|
card (str): Card identifier from `/dev/dri/`, ie: `card0`. See `AMDGPU_CARDS` or `find_cards()`
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
dict: A dictionary of current GPU *power* related statistics.
|
dict: A dictionary of current GPU *power* related statistics.
|
||||||
|
|
||||||
Example:
|
Example:
|
||||||
`{'limit': int, 'average': int, 'capability': int, 'default': int}`
|
`{'limit': int, 'average': int, 'capability': int, 'default': int}`
|
||||||
"""
|
"""
|
||||||
return {"limit": get_gpu_power('limit'),
|
if card in AMDGPU_CARDS:
|
||||||
"average": get_gpu_power('average'),
|
hwmon_dir = AMDGPU_CARDS[card]
|
||||||
"capability": get_gpu_power('cap'),
|
else:
|
||||||
"default": get_gpu_power('default')}
|
if len(AMDGPU_CARDS) > 0:
|
||||||
|
raise ValueError(f"Invalid card: '{card}'. Must be one of: {list(AMDGPU_CARDS.keys())}")
|
||||||
|
raise ValueError(f"Invalid card: '{card}', no AMD GPUs or hwmon directories found")
|
||||||
|
|
||||||
|
return {"limit": read_stat(path.join(hwmon_dir, "power1_cap"), stat_type='power'),
|
||||||
|
"average": read_stat(path.join(hwmon_dir, "power1_average"), stat_type='power'),
|
||||||
|
"capability": read_stat(path.join(hwmon_dir, "power1_cap_max"), stat_type='power'),
|
||||||
|
"default": read_stat(path.join(hwmon_dir, "power1_cap_default"), stat_type='power')}
|
||||||
|
|
||||||
|
|
||||||
# constant; supported power domains by 'get_gpu_power' func
|
def get_core_stats(card: str) -> dict:
|
||||||
# is concatenated with 'pwr_' to index SRC_FILES for the relevant data file
|
|
||||||
POWER_DOMAINS = ('limit', 'average', 'cap', 'default')
|
|
||||||
# defined outside/globally for efficiency -- it's called a lot in the TUI
|
|
||||||
|
|
||||||
|
|
||||||
def get_gpu_power(domain: str) -> int:
|
|
||||||
"""
|
"""
|
||||||
Args:
|
Args:
|
||||||
domain (str): The GPU domain of interest regarding power
|
card (str): Card identifier from `/dev/dri/`, ie: `card0`. See `AMDGPU_CARDS` or `find_cards()`
|
||||||
|
|
||||||
Must be one of POWER_DOMAINS:
|
|
||||||
- limit: the effective limit placed on the card
|
|
||||||
- default: the default limit
|
|
||||||
- average: the average consumption
|
|
||||||
- cap: the board capability
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
int: The requested GPU power statistic by domain, in Watts
|
|
||||||
"""
|
|
||||||
if domain not in POWER_DOMAINS:
|
|
||||||
raise ValueError(f"Invalid power domain: '{domain}'. Must be one of: {POWER_DOMAINS}")
|
|
||||||
return int(int(read_stat(SRC_FILES['pwr_' + domain])) / 1000000)
|
|
||||||
|
|
||||||
|
|
||||||
def get_core_stats() -> dict:
|
|
||||||
"""
|
|
||||||
Returns:
|
Returns:
|
||||||
dict: A dictionary of current GPU *core/memory* related statistics.
|
dict: A dictionary of current GPU *core/memory* related statistics.
|
||||||
|
|
||||||
|
@ -152,21 +119,22 @@ def get_core_stats() -> dict:
|
||||||
Example:
|
Example:
|
||||||
`{'sclk': int, 'mclk': int, 'voltage': float, 'util_pct': int}`
|
`{'sclk': int, 'mclk': int, 'voltage': float, 'util_pct': int}`
|
||||||
"""
|
"""
|
||||||
return {"sclk": get_clock('core'),
|
# verify card -- is it AMD, do we know the hwmon directory?
|
||||||
"mclk": get_clock('memory'),
|
if card in AMDGPU_CARDS:
|
||||||
"voltage": get_voltage(),
|
return {"sclk": get_clock(card, 'core'),
|
||||||
"util_pct": get_gpu_usage()}
|
"mclk": get_clock(card, 'memory'),
|
||||||
|
"voltage": get_voltage(card),
|
||||||
|
"util_pct": get_gpu_usage(card)}
|
||||||
|
if len(AMDGPU_CARDS) > 0:
|
||||||
|
raise ValueError(f"Invalid card: '{card}'. Must be one of: {list(AMDGPU_CARDS.keys())}")
|
||||||
|
raise ValueError(f"Invalid card: '{card}', no AMD GPUs or hwmon directories found")
|
||||||
|
|
||||||
|
|
||||||
# constant; supported clock domains by 'get_clock' func
|
def get_clock(card: str, domain: str, format_freq: bool = False) -> Union[int, str]:
|
||||||
# is concatenated with 'clock_' to index SRC_FILES for the relevant data file
|
|
||||||
CLOCK_DOMAINS = ('core', 'memory')
|
|
||||||
# defined outside/globally for efficiency -- it's called a lot in the TUI
|
|
||||||
|
|
||||||
|
|
||||||
def get_clock(domain: str, format_freq: bool = False) -> Union[int, str]:
|
|
||||||
"""
|
"""
|
||||||
Args:
|
Args:
|
||||||
|
card (str): Card identifier from `/dev/dri/`, ie: `card0`. See `AMDGPU_CARDS` or `find_cards()`
|
||||||
|
|
||||||
domain (str): The GPU domain of interest regarding clock speed.
|
domain (str): The GPU domain of interest regarding clock speed.
|
||||||
Must be one of CLOCK_DOMAINS
|
Must be one of CLOCK_DOMAINS
|
||||||
|
|
||||||
|
@ -178,59 +146,102 @@ def get_clock(domain: str, format_freq: bool = False) -> Union[int, str]:
|
||||||
If format_freq is True, a formatted string with Hz/MHz/GHz
|
If format_freq is True, a formatted string with Hz/MHz/GHz
|
||||||
will be returned instead of an int
|
will be returned instead of an int
|
||||||
"""
|
"""
|
||||||
|
# verify card -- is it AMD, do we know the hwmon directory?
|
||||||
|
if card in AMDGPU_CARDS:
|
||||||
|
hwmon_dir = AMDGPU_CARDS[card]
|
||||||
|
else:
|
||||||
|
if len(AMDGPU_CARDS) > 0:
|
||||||
|
raise ValueError(f"Invalid card: '{card}'. Must be one of: {list(AMDGPU_CARDS.keys())}")
|
||||||
|
raise ValueError(f"Invalid card: '{card}', no AMD GPUs or hwmon directories found")
|
||||||
if domain not in CLOCK_DOMAINS:
|
if domain not in CLOCK_DOMAINS:
|
||||||
raise ValueError(f"Invalid clock domain: '{domain}'. Must be one of: {CLOCK_DOMAINS}")
|
raise ValueError(f"Invalid clock domain: '{domain}'. Must be one of: {CLOCK_DOMAINS}")
|
||||||
|
# set the clock file based on requested domain
|
||||||
|
if domain == 'core':
|
||||||
|
clock_file = path.join(hwmon_dir, "freq1_input")
|
||||||
|
elif domain == 'memory':
|
||||||
|
clock_file = path.join(hwmon_dir, "freq2_input")
|
||||||
|
# handle output processing
|
||||||
if format_freq:
|
if format_freq:
|
||||||
return format_frequency(read_stat(SRC_FILES[domain + '_clock']))
|
return format_frequency(int(read_stat(clock_file)))
|
||||||
return int(read_stat(SRC_FILES[domain + '_clock']))
|
return int(read_stat(clock_file))
|
||||||
|
|
||||||
|
|
||||||
def get_voltage() -> float:
|
def get_voltage(card: str) -> float:
|
||||||
"""
|
"""
|
||||||
|
Args:
|
||||||
|
card (str): Card identifier from `/dev/dri/`, ie: `card0`. See `AMDGPU_CARDS` or `find_cards()`
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
float: The current GPU core voltage
|
float: The current GPU core voltage
|
||||||
"""
|
"""
|
||||||
return round(int(read_stat(SRC_FILES['core_voltage'])) / 1000.0, 2)
|
# verify card -- is it AMD, do we know the hwmon directory?
|
||||||
|
if card in AMDGPU_CARDS:
|
||||||
|
hwmon_dir = AMDGPU_CARDS[card]
|
||||||
|
else:
|
||||||
|
if len(AMDGPU_CARDS) > 0:
|
||||||
|
raise ValueError(f"Invalid card: '{card}'. Must be one of: {list(AMDGPU_CARDS.keys())}")
|
||||||
|
raise ValueError(f"Invalid card: '{card}', no AMD GPUs or hwmon directories found")
|
||||||
|
return round(int(read_stat(path.join(hwmon_dir, "in0_input"))) / 1000.0, 2)
|
||||||
|
|
||||||
|
|
||||||
def get_fan_stats() -> dict:
|
def get_fan_rpm(card: str) -> int:
|
||||||
"""
|
"""
|
||||||
Returns:
|
Args:
|
||||||
dict: A dictionary of current GPU *fan* related statistics.
|
card (str): Card identifier from `/dev/dri/`, ie: `card0`. See `AMDGPU_CARDS` or `find_cards()`
|
||||||
|
|
||||||
Example:
|
|
||||||
`{'fan_rpm': int, 'fan_rpm_target': int}`
|
|
||||||
"""
|
|
||||||
return {"fan_rpm": get_fan_rpm(),
|
|
||||||
"fan_rpm_target": get_fan_target()}
|
|
||||||
|
|
||||||
|
|
||||||
def get_fan_rpm() -> int:
|
|
||||||
"""
|
|
||||||
Returns:
|
Returns:
|
||||||
int: The current fan RPM
|
int: The current fan RPM
|
||||||
"""
|
"""
|
||||||
return int(read_stat(SRC_FILES['fan_rpm']))
|
# verify card -- is it AMD, do we know the hwmon directory?
|
||||||
|
if card in AMDGPU_CARDS:
|
||||||
|
hwmon_dir = AMDGPU_CARDS[card]
|
||||||
|
else:
|
||||||
|
if len(AMDGPU_CARDS) > 0:
|
||||||
|
raise ValueError(f"Invalid card: '{card}'. Must be one of: {list(AMDGPU_CARDS.keys())}")
|
||||||
|
raise ValueError(f"Invalid card: '{card}', no AMD GPUs or hwmon directories found")
|
||||||
|
return int(read_stat(path.join(hwmon_dir, "fan1_input")))
|
||||||
|
|
||||||
|
|
||||||
def get_fan_target() -> int:
|
def get_fan_target(card: str) -> int:
|
||||||
"""
|
"""
|
||||||
|
Args:
|
||||||
|
card (str): Card identifier from `/dev/dri/`, ie: `card0`. See `AMDGPU_CARDS` or `find_cards()`
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
int: The current fan RPM
|
int: The current fan RPM
|
||||||
"""
|
"""
|
||||||
return int(read_stat(SRC_FILES['fan_rpm_target']))
|
# verify card -- is it AMD, do we know the hwmon directory?
|
||||||
|
if card in AMDGPU_CARDS:
|
||||||
|
hwmon_dir = AMDGPU_CARDS[card]
|
||||||
|
else:
|
||||||
|
if len(AMDGPU_CARDS) > 0:
|
||||||
|
raise ValueError(f"Invalid card: '{card}'. Must be one of: {list(AMDGPU_CARDS.keys())}")
|
||||||
|
raise ValueError(f"Invalid card: '{card}', no AMD GPUs or hwmon directories found")
|
||||||
|
return int(read_stat(path.join(hwmon_dir, "fan1_target")))
|
||||||
|
|
||||||
|
|
||||||
def get_gpu_usage() -> int:
|
def get_gpu_usage(card: str) -> int:
|
||||||
"""
|
"""
|
||||||
|
Args:
|
||||||
|
card (str): Card identifier from `/dev/dri/`, ie: `card0`. See `AMDGPU_CARDS` or `find_cards()`
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
int: The current GPU usage/utilization as a percentage
|
int: The current GPU usage/utilization as a percentage
|
||||||
"""
|
"""
|
||||||
return int(read_stat(SRC_FILES['busy_pct']))
|
if card in AMDGPU_CARDS:
|
||||||
|
stat_file = path.join("/sys/class/drm/", card, "device/gpu_busy_percent")
|
||||||
|
else:
|
||||||
|
if len(AMDGPU_CARDS) > 0:
|
||||||
|
raise ValueError(f"Invalid card: '{card}'. Must be one of: {list(AMDGPU_CARDS.keys())}")
|
||||||
|
raise ValueError(f"Invalid card: '{card}', no AMD GPUs or hwmon directories found")
|
||||||
|
return int(read_stat(stat_file))
|
||||||
|
|
||||||
|
|
||||||
def get_temp_stats() -> dict:
|
def get_temp_stats(card: str) -> dict:
|
||||||
"""
|
"""
|
||||||
|
Args:
|
||||||
|
card (str): Card identifier from `/dev/dri/`, ie: `card0`. See `AMDGPU_CARDS` or `find_cards()`
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
dict: A dictionary of current GPU *temperature* related statistics.
|
dict: A dictionary of current GPU *temperature* related statistics.
|
||||||
|
|
||||||
|
@ -243,8 +254,28 @@ def get_temp_stats() -> dict:
|
||||||
|
|
||||||
Returned values are converted to C, as integers for simple comparison
|
Returned values are converted to C, as integers for simple comparison
|
||||||
"""
|
"""
|
||||||
|
if card in AMDGPU_CARDS:
|
||||||
|
hwmon_dir = AMDGPU_CARDS[card]
|
||||||
|
else:
|
||||||
|
if len(AMDGPU_CARDS) > 0:
|
||||||
|
raise ValueError(f"Invalid card: '{card}'. Must be one of: {list(AMDGPU_CARDS.keys())}")
|
||||||
|
raise ValueError(f"Invalid card: '{card}', no AMD GPUs or hwmon directories found")
|
||||||
|
# determine temperature nodes, construct a dict to store them
|
||||||
|
# interface will iterate over these, creating labels as needed
|
||||||
|
temp_files = {}
|
||||||
|
temp_node_labels = glob.glob(path.join(hwmon_dir, "temp*_label"))
|
||||||
|
for temp_node_label_file in temp_node_labels:
|
||||||
|
# determine the base node id, eg: temp1
|
||||||
|
# construct the path to the file that will label it. ie: edge/junction
|
||||||
|
temp_node_id = path.basename(temp_node_label_file).split('_')[0]
|
||||||
|
temp_node_value_file = path.join(hwmon_dir, f"{temp_node_id}_input")
|
||||||
|
with open(temp_node_label_file, 'r', encoding='utf-8') as _node:
|
||||||
|
temp_node_name = _node.read().strip()
|
||||||
|
# add the node name/type and the corresponding temp file to the dict
|
||||||
|
temp_files[temp_node_name] = temp_node_value_file
|
||||||
|
|
||||||
temp_update = {}
|
temp_update = {}
|
||||||
for temp_node, temp_file in TEMP_FILES.items():
|
for temp_node, temp_file in temp_files.items():
|
||||||
# iterate through the discovered temperature nodes
|
# iterate through the discovered temperature nodes
|
||||||
# ... updating the dictionary with new stats
|
# ... updating the dictionary with new stats
|
||||||
_temperature = int(int(read_stat(temp_file)) // 1000)
|
_temperature = int(int(read_stat(temp_file)) // 1000)
|
||||||
|
|
Reference in a new issue