Archived
1
1
Fork 0

Merge pull request #17 from joshlay/configurable_card

v0.1.9: configurable funcs (card)
This commit is contained in:
Josh Lay 2023-04-26 23:14:45 -05:00 committed by GitHub
commit fc5f4df45d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 226 additions and 169 deletions

View file

@ -4,11 +4,9 @@ A simple Python module/TUI _(using [Textual](https://textual.textualize.io/))_ t
![Screenshot of main screen](https://raw.githubusercontent.com/joshlay/amdgpu_stats/master/screens/main.png "Main screen") ![Screenshot of main screen](https://raw.githubusercontent.com/joshlay/amdgpu_stats/master/screens/main.png "Main screen")
![Screenshot of log screen](https://raw.githubusercontent.com/joshlay/amdgpu_stats/master/screens/logging.png "Logging screen")
The GPU and temperature nodes (`edge`/`junction`/etc.) are discovered automatically. The GPU and temperature nodes (`edge`/`junction`/etc.) are discovered automatically.
Statistics are not logged; only toggling Dark/light mode and the stat names / source files. Please see [the module section](#module) or [the docs](https://amdgpu-stats.readthedocs.io/en/latest/) for information on usage as an `import` in other tooling
Tested _only_ on `RX6000` series cards; APUs and more _may_ be supported. Please file an issue if finding incompatibility! Tested _only_ on `RX6000` series cards; APUs and more _may_ be supported. Please file an issue if finding incompatibility!
@ -35,18 +33,17 @@ Demonstration:
``` ```
In [1]: import amdgpu_stats.utils In [1]: import amdgpu_stats.utils
In [2]: print(amdgpu_stats.utils.get_core_stats()) In [2]: amdgpu_stats.utils.AMDGPU_CARDS
{'sclk': 0, 'mclk': 1000000000, 'voltage': 0.01, 'util_pct': 0} Out[2]: {'card0': '/sys/class/drm/card0/device/hwmon/hwmon9'}
In [3]: print(amdgpu_stats.utils.get_power_stats()) In [3]: amdgpu_stats.utils.get_core_stats('card0')
{'limit': 281, 'average': 35, 'capability': 323, 'default': 281} Out[3]: {'sclk': 640000000, 'mclk': 1000000000, 'voltage': 0.79, 'util_pct': 65}
In [4]: print(amdgpu_stats.utils.get_temp_stats()) In [4]: amdgpu_stats.utils.get_clock('card0', 'core', format_freq=True)
{'edge': 33, 'junction': 36, 'mem': 42} Out[4]: '659 MHz'
In [5]: print(amdgpu_stats.utils.get_fan_stats())
{'fan_rpm': 0, 'fan_rpm_target': 0}
``` ```
Feature requests [are encouraged](https://github.com/joshlay/amdgpu_stats/issues) :)
## Documentation ## Documentation
For more information on the module, see: For more information on the module, see:

View file

@ -1,6 +1,6 @@
[tool.poetry] [tool.poetry]
name = "amdgpu-stats" name = "amdgpu-stats"
version = "0.1.8" version = "0.1.9"
description = "A simple module/TUI (using Textual) that provides AMD GPU statistics" description = "A simple module/TUI (using Textual) that provides AMD GPU statistics"
authors = ["Josh Lay <pypi@jlay.io>"] authors = ["Josh Lay <pypi@jlay.io>"]
repository = "https://github.com/joshlay/amdgpu_stats" repository = "https://github.com/joshlay/amdgpu_stats"

View file

@ -23,7 +23,13 @@ from textual.reactive import reactive
from textual.screen import Screen from textual.screen import Screen
from textual.widgets import Header, Footer, Static, TextLog, Label from textual.widgets import Header, Footer, Static, TextLog, Label
from .utils import CARD, SRC_FILES, TEMP_FILES, format_frequency, get_core_stats, get_fan_stats, get_power_stats, get_temp_stats # pylint: disable=line-too-long from .utils import AMDGPU_CARDS, format_frequency, get_core_stats, get_fan_rpm, get_fan_target, get_power_stats, get_temp_stats # pylint: disable=line-too-long
# globals - card handling / choice for TUI
if len(AMDGPU_CARDS) > 0:
# default to showing stats for the first detected card
CARD = next(iter(AMDGPU_CARDS))
hwmon_dir = AMDGPU_CARDS[CARD]
class LogScreen(Screen): class LogScreen(Screen):
@ -79,17 +85,18 @@ class GPUStats(App):
yield Header() yield Header()
yield Container(GPUStatsWidget()) yield Container(GPUStatsWidget())
self.update_log("[bold green]App started, logging begin!") self.update_log("[bold green]App started, logging begin!")
self.update_log("[bold italic]Information sources:[/]") self.update_log(f"[bold italic]Information source:[/] {hwmon_dir}")
for metric, source in SRC_FILES.items(): # nice-to-have: account for not storing these in dicts, but resolved in funcs
self.update_log(f'[bold] {metric}:[/] {source}') # for metric, source in SRC_FILES.items():
for metric, source in TEMP_FILES.items(): # self.update_log(f'[bold] {metric}:[/] {source}')
self.update_log(f'[bold] {metric} temperature:[/] {source}') # for metric, source in TEMP_FILES.items():
# self.update_log(f'[bold] {metric} temperature:[/] {source}')
yield Footer() yield Footer()
def action_toggle_dark(self) -> None: def action_toggle_dark(self) -> None:
"""An action to toggle dark mode.""" """An action to toggle dark mode."""
self.dark = not self.dark self.dark = not self.dark
self.update_log(f"Dark side: [bold]{self.dark}") self.update_log(f"[bold]Dark side: [italic]{self.dark}")
def action_quit_app(self) -> None: def action_quit_app(self) -> None:
"""An action to quit the program""" """An action to quit the program"""
@ -114,28 +121,39 @@ class MiscDisplay(Static):
"""A widget to display misc. GPU stats.""" """A widget to display misc. GPU stats."""
# construct the misc. stats dict; appended by discovered temperature nodes # construct the misc. stats dict; appended by discovered temperature nodes
# used to make a 'reactive' object # used to make a 'reactive' object
fan_stats = reactive({"fan_rpm": 0, fan_rpm = reactive(0)
"fan_rpm_target": 0}) fan_rpm_target = reactive(0)
# do some dancing to craft the UI; initialize the reactive obj with data
# to get proper labels
initial_stats = get_temp_stats(CARD)
# dynamic object for temperature updates
temp_stats = reactive({}) temp_stats = reactive({})
# default to 'not composed', once labels are made - become true
# avoids a race condition between discovering temperature nodes/stats
# ... and making labels for them
composed = False
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs) super().__init__(*args, **kwargs)
self.timer_misc = None self.timer_misc = None
self.temp_stats = get_temp_stats(CARD)
def compose(self) -> ComposeResult: def compose(self) -> ComposeResult:
yield Horizontal(Label("[underline]Temperatures"), yield Horizontal(Label("[underline]Temperatures"),
Label("", classes="statvalue")) Label("", classes="statvalue"))
for temp_node in TEMP_FILES: for temp_node in self.initial_stats:
# capitalize the first letter for display # capitalize the first letter for display
caption = temp_node[0].upper() + temp_node[1:] caption = temp_node[0].upper() + temp_node[1:]
yield Horizontal(Label(f' {caption}:',), yield Horizontal(Label(f' {caption}:',),
Label("", id="temp_" + temp_node, classes="statvalue")) Label("", id="temp_" + temp_node,
classes="statvalue"))
yield Horizontal(Label("[underline]Fan RPM"), yield Horizontal(Label("[underline]Fan RPM"),
Label("", classes="statvalue")) Label("", classes="statvalue"))
yield Horizontal(Label(" Current:",), yield Horizontal(Label(" Current:",),
Label("", id="fan_rpm", classes="statvalue")) Label("", id="fan_rpm", classes="statvalue"))
yield Horizontal(Label(" Target:",), yield Horizontal(Label(" Target:",),
Label("", id="fan_rpm_target", classes="statvalue")) Label("", id="fan_rpm_target", classes="statvalue"))
self.composed = True
def on_mount(self) -> None: def on_mount(self) -> None:
"""Event handler called when widget is added to the app.""" """Event handler called when widget is added to the app."""
@ -145,24 +163,32 @@ class MiscDisplay(Static):
"""Method to update the temp/fan values to current measurements. """Method to update the temp/fan values to current measurements.
Run by a timer created 'on_mount'""" Run by a timer created 'on_mount'"""
self.fan_stats = get_fan_stats() self.fan_rpm = get_fan_rpm(CARD)
self.temp_stats = get_temp_stats() self.fan_rpm_target = get_fan_target(CARD)
self.temp_stats = get_temp_stats(CARD)
def watch_fan_stats(self, fan_stats: dict) -> None: def watch_fan_rpm(self, fan_rpm: int) -> None:
"""Called when the 'fan_stats' reactive attr changes. """Called when the 'fan_rpm' reactive attr changes.
- Updates label values - Updates label values
- Casting inputs to string to avoid type problems w/ int/None""" - Casting inputs to string to avoid type problems w/ int/None"""
self.query_one("#fan_rpm", Static).update(f"{fan_stats['fan_rpm']}") self.query_one("#fan_rpm", Static).update(f"{fan_rpm}")
self.query_one("#fan_rpm_target", Static).update(f"{fan_stats['fan_rpm_target']}")
def watch_fan_rpm_target(self, fan_rpm_target: int) -> None:
"""Called when the 'fan_rpm_target' reactive attr changes.
- Updates label values
- Casting inputs to string to avoid type problems w/ int/None"""
self.query_one("#fan_rpm_target", Static).update(f"{fan_rpm_target}")
def watch_temp_stats(self, temp_stats: dict) -> None: def watch_temp_stats(self, temp_stats: dict) -> None:
"""Called when the temp_stats reactive attr changes, updates labels""" """Called when the temp_stats reactive attr changes, updates labels"""
for temp_node in TEMP_FILES: # try to avoid racing
# check first if the reactive object has been updated with keys if not self.composed:
if temp_node in temp_stats: return
stat_dict_item = temp_stats[temp_node] for temp_node in temp_stats:
self.query_one("#temp_" + temp_node, Static).update(f'{stat_dict_item}C') item_val = self.temp_stats[temp_node]
self.query_one("#temp_" + temp_node, Static).update(f'{item_val}C')
class ClockDisplay(Static): class ClockDisplay(Static):
@ -180,7 +206,8 @@ class ClockDisplay(Static):
Label("", id="clk_core_val", classes="statvalue")) Label("", id="clk_core_val", classes="statvalue"))
yield Horizontal(Label(" Memory:"), yield Horizontal(Label(" Memory:"),
Label("", id="clk_memory_val", classes="statvalue")) Label("", id="clk_memory_val", classes="statvalue"))
yield Horizontal(Label(""), Label("", classes="statvalue")) # padding to split groups # padding to split groups
yield Horizontal(Label(""), Label("", classes="statvalue"))
yield Horizontal(Label("[underline]Core"), yield Horizontal(Label("[underline]Core"),
Label("", classes="statvalue")) Label("", classes="statvalue"))
yield Horizontal(Label(" Utilization:",), yield Horizontal(Label(" Utilization:",),
@ -195,7 +222,7 @@ class ClockDisplay(Static):
def update_core_vals(self) -> None: def update_core_vals(self) -> None:
"""Method to update GPU clock values to the current measurements. """Method to update GPU clock values to the current measurements.
Run by a timer created 'on_mount'""" Run by a timer created 'on_mount'"""
self.core_vals = get_core_stats() self.core_vals = get_core_stats(CARD)
def watch_core_vals(self, core_vals: dict) -> None: def watch_core_vals(self, core_vals: dict) -> None:
"""Called when the clocks attribute changes """Called when the clocks attribute changes
@ -214,21 +241,22 @@ class ClockDisplay(Static):
class PowerDisplay(Static): class PowerDisplay(Static):
"""A widget to display GPU power stats.""" """A widget to display GPU power stats."""
micro_watts = reactive({"limit": 0, watts = reactive({"limit": 0,
"average": 0, "average": 0,
"capability": 0, "capability": 0,
"default": 0}) "default": 0})
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs) super().__init__(*args, **kwargs)
self.timer_micro_watts = None self.timer_watts = None
def compose(self) -> ComposeResult: def compose(self) -> ComposeResult:
yield Horizontal(Label("[underline]Power"), yield Horizontal(Label("[underline]Power"),
Label("", classes="statvalue")) Label("", classes="statvalue"))
yield Horizontal(Label(" Usage:",), yield Horizontal(Label(" Usage:",),
Label("", id="pwr_avg_val", classes="statvalue")) Label("", id="pwr_avg_val", classes="statvalue"))
yield Horizontal(Label(""), Label("", classes="statvalue")) # padding to split groups # padding to split groups
yield Horizontal(Label(""), Label("", classes="statvalue"))
yield Horizontal(Label("[underline]Limits"), yield Horizontal(Label("[underline]Limits"),
Label("", classes="statvalue")) Label("", classes="statvalue"))
yield Horizontal(Label(" Configured:",), yield Horizontal(Label(" Configured:",),
@ -240,31 +268,32 @@ class PowerDisplay(Static):
def on_mount(self) -> None: def on_mount(self) -> None:
"""Event handler called when widget is added to the app.""" """Event handler called when widget is added to the app."""
self.timer_micro_watts = self.set_interval(1, self.update_micro_watts) self.timer_watts = self.set_interval(1, self.update_watts)
def update_micro_watts(self) -> None: def update_watts(self) -> None:
"""Method to update GPU power values to current measurements. """Method to update GPU power values to current measurements.
Run by a timer created 'on_mount'""" Run by a timer created 'on_mount'"""
self.micro_watts = get_power_stats() self.watts = get_power_stats(CARD)
def watch_micro_watts(self, micro_watts: dict) -> None: def watch_watts(self, watts: dict) -> None:
"""Called when the micro_watts attributes change. """Called when the 'watts' reactive attribute (var) changes.
- Updates label values - Updates label values
- Casting inputs to string to avoid type problems w/ int/None""" - Casting inputs to string to avoid type problems w/ int/None"""
self.query_one("#pwr_avg_val", self.query_one("#pwr_avg_val",
Static).update(f"{micro_watts['average']}W") Static).update(f"{watts['average']}W")
self.query_one("#pwr_lim_val", self.query_one("#pwr_lim_val",
Static).update(f"{micro_watts['limit']}W") Static).update(f"{watts['limit']}W")
self.query_one("#pwr_def_val", self.query_one("#pwr_def_val",
Static).update(f"{micro_watts['default']}W") Static).update(f"{watts['default']}W")
self.query_one("#pwr_cap_val", self.query_one("#pwr_cap_val",
Static).update(f"{micro_watts['capability']}W") Static).update(f"{watts['capability']}W")
def tui() -> None: def tui() -> None:
'''Spawns the textual UI only during CLI invocation / after argparse''' '''Spawns the textual UI only during CLI invocation / after argparse'''
if CARD is None: if len(AMDGPU_CARDS) > 0:
app = GPUStats()
app.run()
else:
sys.exit('Could not find an AMD GPU, exiting.') sys.exit('Could not find an AMD GPU, exiting.')
app = GPUStats()
app.run()

View file

@ -8,85 +8,64 @@ Variables:
- hwmon_dir: the `hwmon` interface (dir) that provides stats for this card - hwmon_dir: the `hwmon` interface (dir) that provides stats for this card
- SRC_FILES: dictionary of the known stats from the items in `hwmon_dir` - SRC_FILES: dictionary of the known stats from the items in `hwmon_dir`
- TEMP_FILES: dictionary of the *discovered* temperature nodes / stat files - TEMP_FILES: dictionary of the *discovered* temperature nodes / stat files
- POWER_DOMAINS: tuple of supported power domains: `average`, `limit`, `cap`, and `default`
- CLOCK_DOMAINS: tuple of supported clock domains: `core`, `memory` - CLOCK_DOMAINS: tuple of supported clock domains: `core`, `memory`
""" """
# disable superfluous linting # disable superfluous linting
# pylint: disable=line-too-long # pylint: disable=line-too-long
from os import path from os import path
import glob import glob
from typing import Tuple, Optional, Union from typing import Optional, Union
from humanfriendly import format_size from humanfriendly import format_size
def find_card() -> Optional[Tuple[Optional[str], Optional[str]]]: def find_cards() -> dict:
"""Searches contents of /sys/class/drm/card*/device/hwmon/hwmon*/name """Searches contents of `/sys/class/drm/card*/device/hwmon/hwmon*/name`
... looking for 'amdgpu' to find a card to monitor Reads 'hwmon' names looking for 'amdgpu' to find cards to monitor.
If no AMD GPU found, this will be: (None, None) If device(s) found, returns a dictionary of cards with their hwmon directories.
If *none* found, this will be an empty dict.
Returns: Returns:
tuple: ('cardN', '/hwmon/directory/with/stat/files') dict: `{'cardN': '/hwmon/directory/with/stat/files', 'cardY': '/other/hwmon/directory/for/cardY'}`
""" """
_card = None cards = {}
_hwmon_dir = None card_glob_pattern = '/sys/class/drm/card*/device/hwmon/hwmon*/name'
hwmon_names_glob = '/sys/class/drm/card*/device/hwmon/hwmon*/name' hwmon_names = glob.glob(card_glob_pattern)
hwmon_names = glob.glob(hwmon_names_glob)
for hwmon_name_file in hwmon_names: for hwmon_name_file in hwmon_names:
with open(hwmon_name_file, "r", encoding="utf-8") as _f: with open(hwmon_name_file, "r", encoding="utf-8") as _f:
if _f.read().strip() == 'amdgpu': if _f.read().strip() == 'amdgpu':
# found an amdgpu # found an amdgpu
# note: if multiple are found, last will be used/watched
# will be configurable in the future, may prompt
_card = hwmon_name_file.split('/')[4] _card = hwmon_name_file.split('/')[4]
_hwmon_dir = path.dirname(hwmon_name_file) _hwmon_dir = path.dirname(hwmon_name_file)
return _card, _hwmon_dir cards[_card] = _hwmon_dir
return cards
# base vars: card identifier, hwmon directory for stats, then the stat dicts # discover all available AMD GPUs
CARD, hwmon_dir = find_card() AMDGPU_CARDS = find_cards()
if CARD is not None: # supported clock domains by 'get_clock' func
card_dir = path.join("/sys/class/drm/", CARD) # eg: /sys/class/drm/card0/ # is concatenated with 'clock_' to index SRC_FILES for the relevant data file
CLOCK_DOMAINS = ('core', 'memory')
# dictionary of known source files # defined outside/globally for efficiency -- it's called a lot in the TUI
# ref: https://docs.kernel.org/gpu/amdgpu/thermal.html
SRC_FILES = {'pwr_limit': path.join(hwmon_dir, "power1_cap"),
'pwr_average': path.join(hwmon_dir, "power1_average"),
'pwr_cap': path.join(hwmon_dir, "power1_cap_max"),
'pwr_default': path.join(hwmon_dir, "power1_cap_default"),
'core_clock': path.join(hwmon_dir, "freq1_input"),
'core_voltage': path.join(hwmon_dir, "in0_input"),
'memory_clock': path.join(hwmon_dir, "freq2_input"),
'busy_pct': path.join(card_dir, "device/gpu_busy_percent"),
'temp_c': path.join(hwmon_dir, "temp1_input"),
'fan_rpm': path.join(hwmon_dir, "fan1_input"),
'fan_rpm_target': path.join(hwmon_dir, "fan1_target"),
}
# determine temperature nodes, construct a dict to store them
# interface will iterate over these, creating labels as needed
TEMP_FILES = {}
temp_node_labels = glob.glob(path.join(hwmon_dir, "temp*_label"))
for temp_node_label_file in temp_node_labels:
# determine the base node id, eg: temp1
# construct the path to the file that will label it. ie: edge/junction
temp_node_id = path.basename(temp_node_label_file).split('_')[0]
temp_node_value_file = path.join(hwmon_dir, f"{temp_node_id}_input")
with open(temp_node_label_file, 'r', encoding='utf-8') as _node:
temp_node_name = _node.read().strip()
# add the node name/type and the corresponding temp file to the dict
TEMP_FILES[temp_node_name] = temp_node_value_file
def read_stat(file: str) -> str: def read_stat(file: str, stat_type: Optional[str] = None) -> str:
"""Read statistic `file`, return the stripped contents """Read statistic `file`, return the stripped contents
Args:
file (str): The statistic file to read/return
stat_type (str): Optional type, if specified - can convert data.
Returns: Returns:
str: Statistics from `file`""" str: Statistics from `file`. If `stat_type='power'`, will convert mW to Watts"""
with open(file, "r", encoding="utf-8") as _fh: with open(file, "r", encoding="utf-8") as _fh:
data = _fh.read() data = _fh.read().strip()
return data.strip() if stat_type == 'power':
data = int(int(data) / 1000000)
return data
def format_frequency(frequency_hz: int) -> str: def format_frequency(frequency_hz: int) -> str:
@ -103,47 +82,35 @@ def format_frequency(frequency_hz: int) -> str:
) )
def get_power_stats() -> dict: def get_power_stats(card: str) -> dict:
""" """
Args:
card (str): Card identifier from `/dev/dri/`, ie: `card0`. See `AMDGPU_CARDS` or `find_cards()`
Returns: Returns:
dict: A dictionary of current GPU *power* related statistics. dict: A dictionary of current GPU *power* related statistics.
Example: Example:
`{'limit': int, 'average': int, 'capability': int, 'default': int}` `{'limit': int, 'average': int, 'capability': int, 'default': int}`
""" """
return {"limit": get_gpu_power('limit'), if card in AMDGPU_CARDS:
"average": get_gpu_power('average'), hwmon_dir = AMDGPU_CARDS[card]
"capability": get_gpu_power('cap'), else:
"default": get_gpu_power('default')} if len(AMDGPU_CARDS) > 0:
raise ValueError(f"Invalid card: '{card}'. Must be one of: {list(AMDGPU_CARDS.keys())}")
raise ValueError(f"Invalid card: '{card}', no AMD GPUs or hwmon directories found")
return {"limit": read_stat(path.join(hwmon_dir, "power1_cap"), stat_type='power'),
"average": read_stat(path.join(hwmon_dir, "power1_average"), stat_type='power'),
"capability": read_stat(path.join(hwmon_dir, "power1_cap_max"), stat_type='power'),
"default": read_stat(path.join(hwmon_dir, "power1_cap_default"), stat_type='power')}
# constant; supported power domains by 'get_gpu_power' func def get_core_stats(card: str) -> dict:
# is concatenated with 'pwr_' to index SRC_FILES for the relevant data file
POWER_DOMAINS = ('limit', 'average', 'cap', 'default')
# defined outside/globally for efficiency -- it's called a lot in the TUI
def get_gpu_power(domain: str) -> int:
""" """
Args: Args:
domain (str): The GPU domain of interest regarding power card (str): Card identifier from `/dev/dri/`, ie: `card0`. See `AMDGPU_CARDS` or `find_cards()`
Must be one of POWER_DOMAINS:
- limit: the effective limit placed on the card
- default: the default limit
- average: the average consumption
- cap: the board capability
Returns:
int: The requested GPU power statistic by domain, in Watts
"""
if domain not in POWER_DOMAINS:
raise ValueError(f"Invalid power domain: '{domain}'. Must be one of: {POWER_DOMAINS}")
return int(int(read_stat(SRC_FILES['pwr_' + domain])) / 1000000)
def get_core_stats() -> dict:
"""
Returns: Returns:
dict: A dictionary of current GPU *core/memory* related statistics. dict: A dictionary of current GPU *core/memory* related statistics.
@ -152,21 +119,22 @@ def get_core_stats() -> dict:
Example: Example:
`{'sclk': int, 'mclk': int, 'voltage': float, 'util_pct': int}` `{'sclk': int, 'mclk': int, 'voltage': float, 'util_pct': int}`
""" """
return {"sclk": get_clock('core'), # verify card -- is it AMD, do we know the hwmon directory?
"mclk": get_clock('memory'), if card in AMDGPU_CARDS:
"voltage": get_voltage(), return {"sclk": get_clock(card, 'core'),
"util_pct": get_gpu_usage()} "mclk": get_clock(card, 'memory'),
"voltage": get_voltage(card),
"util_pct": get_gpu_usage(card)}
if len(AMDGPU_CARDS) > 0:
raise ValueError(f"Invalid card: '{card}'. Must be one of: {list(AMDGPU_CARDS.keys())}")
raise ValueError(f"Invalid card: '{card}', no AMD GPUs or hwmon directories found")
# constant; supported clock domains by 'get_clock' func def get_clock(card: str, domain: str, format_freq: bool = False) -> Union[int, str]:
# is concatenated with 'clock_' to index SRC_FILES for the relevant data file
CLOCK_DOMAINS = ('core', 'memory')
# defined outside/globally for efficiency -- it's called a lot in the TUI
def get_clock(domain: str, format_freq: bool = False) -> Union[int, str]:
""" """
Args: Args:
card (str): Card identifier from `/dev/dri/`, ie: `card0`. See `AMDGPU_CARDS` or `find_cards()`
domain (str): The GPU domain of interest regarding clock speed. domain (str): The GPU domain of interest regarding clock speed.
Must be one of CLOCK_DOMAINS Must be one of CLOCK_DOMAINS
@ -178,59 +146,102 @@ def get_clock(domain: str, format_freq: bool = False) -> Union[int, str]:
If format_freq is True, a formatted string with Hz/MHz/GHz If format_freq is True, a formatted string with Hz/MHz/GHz
will be returned instead of an int will be returned instead of an int
""" """
# verify card -- is it AMD, do we know the hwmon directory?
if card in AMDGPU_CARDS:
hwmon_dir = AMDGPU_CARDS[card]
else:
if len(AMDGPU_CARDS) > 0:
raise ValueError(f"Invalid card: '{card}'. Must be one of: {list(AMDGPU_CARDS.keys())}")
raise ValueError(f"Invalid card: '{card}', no AMD GPUs or hwmon directories found")
if domain not in CLOCK_DOMAINS: if domain not in CLOCK_DOMAINS:
raise ValueError(f"Invalid clock domain: '{domain}'. Must be one of: {CLOCK_DOMAINS}") raise ValueError(f"Invalid clock domain: '{domain}'. Must be one of: {CLOCK_DOMAINS}")
# set the clock file based on requested domain
if domain == 'core':
clock_file = path.join(hwmon_dir, "freq1_input")
elif domain == 'memory':
clock_file = path.join(hwmon_dir, "freq2_input")
# handle output processing
if format_freq: if format_freq:
return format_frequency(read_stat(SRC_FILES[domain + '_clock'])) return format_frequency(int(read_stat(clock_file)))
return int(read_stat(SRC_FILES[domain + '_clock'])) return int(read_stat(clock_file))
def get_voltage() -> float: def get_voltage(card: str) -> float:
""" """
Args:
card (str): Card identifier from `/dev/dri/`, ie: `card0`. See `AMDGPU_CARDS` or `find_cards()`
Returns: Returns:
float: The current GPU core voltage float: The current GPU core voltage
""" """
return round(int(read_stat(SRC_FILES['core_voltage'])) / 1000.0, 2) # verify card -- is it AMD, do we know the hwmon directory?
if card in AMDGPU_CARDS:
hwmon_dir = AMDGPU_CARDS[card]
else:
if len(AMDGPU_CARDS) > 0:
raise ValueError(f"Invalid card: '{card}'. Must be one of: {list(AMDGPU_CARDS.keys())}")
raise ValueError(f"Invalid card: '{card}', no AMD GPUs or hwmon directories found")
return round(int(read_stat(path.join(hwmon_dir, "in0_input"))) / 1000.0, 2)
def get_fan_stats() -> dict: def get_fan_rpm(card: str) -> int:
""" """
Returns: Args:
dict: A dictionary of current GPU *fan* related statistics. card (str): Card identifier from `/dev/dri/`, ie: `card0`. See `AMDGPU_CARDS` or `find_cards()`
Example:
`{'fan_rpm': int, 'fan_rpm_target': int}`
"""
return {"fan_rpm": get_fan_rpm(),
"fan_rpm_target": get_fan_target()}
def get_fan_rpm() -> int:
"""
Returns: Returns:
int: The current fan RPM int: The current fan RPM
""" """
return int(read_stat(SRC_FILES['fan_rpm'])) # verify card -- is it AMD, do we know the hwmon directory?
if card in AMDGPU_CARDS:
hwmon_dir = AMDGPU_CARDS[card]
else:
if len(AMDGPU_CARDS) > 0:
raise ValueError(f"Invalid card: '{card}'. Must be one of: {list(AMDGPU_CARDS.keys())}")
raise ValueError(f"Invalid card: '{card}', no AMD GPUs or hwmon directories found")
return int(read_stat(path.join(hwmon_dir, "fan1_input")))
def get_fan_target() -> int: def get_fan_target(card: str) -> int:
""" """
Args:
card (str): Card identifier from `/dev/dri/`, ie: `card0`. See `AMDGPU_CARDS` or `find_cards()`
Returns: Returns:
int: The current fan RPM int: The current fan RPM
""" """
return int(read_stat(SRC_FILES['fan_rpm_target'])) # verify card -- is it AMD, do we know the hwmon directory?
if card in AMDGPU_CARDS:
hwmon_dir = AMDGPU_CARDS[card]
else:
if len(AMDGPU_CARDS) > 0:
raise ValueError(f"Invalid card: '{card}'. Must be one of: {list(AMDGPU_CARDS.keys())}")
raise ValueError(f"Invalid card: '{card}', no AMD GPUs or hwmon directories found")
return int(read_stat(path.join(hwmon_dir, "fan1_target")))
def get_gpu_usage() -> int: def get_gpu_usage(card: str) -> int:
""" """
Args:
card (str): Card identifier from `/dev/dri/`, ie: `card0`. See `AMDGPU_CARDS` or `find_cards()`
Returns: Returns:
int: The current GPU usage/utilization as a percentage int: The current GPU usage/utilization as a percentage
""" """
return int(read_stat(SRC_FILES['busy_pct'])) if card in AMDGPU_CARDS:
stat_file = path.join("/sys/class/drm/", card, "device/gpu_busy_percent")
else:
if len(AMDGPU_CARDS) > 0:
raise ValueError(f"Invalid card: '{card}'. Must be one of: {list(AMDGPU_CARDS.keys())}")
raise ValueError(f"Invalid card: '{card}', no AMD GPUs or hwmon directories found")
return int(read_stat(stat_file))
def get_temp_stats() -> dict: def get_temp_stats(card: str) -> dict:
""" """
Args:
card (str): Card identifier from `/dev/dri/`, ie: `card0`. See `AMDGPU_CARDS` or `find_cards()`
Returns: Returns:
dict: A dictionary of current GPU *temperature* related statistics. dict: A dictionary of current GPU *temperature* related statistics.
@ -243,8 +254,28 @@ def get_temp_stats() -> dict:
Returned values are converted to C, as integers for simple comparison Returned values are converted to C, as integers for simple comparison
""" """
if card in AMDGPU_CARDS:
hwmon_dir = AMDGPU_CARDS[card]
else:
if len(AMDGPU_CARDS) > 0:
raise ValueError(f"Invalid card: '{card}'. Must be one of: {list(AMDGPU_CARDS.keys())}")
raise ValueError(f"Invalid card: '{card}', no AMD GPUs or hwmon directories found")
# determine temperature nodes, construct a dict to store them
# interface will iterate over these, creating labels as needed
temp_files = {}
temp_node_labels = glob.glob(path.join(hwmon_dir, "temp*_label"))
for temp_node_label_file in temp_node_labels:
# determine the base node id, eg: temp1
# construct the path to the file that will label it. ie: edge/junction
temp_node_id = path.basename(temp_node_label_file).split('_')[0]
temp_node_value_file = path.join(hwmon_dir, f"{temp_node_id}_input")
with open(temp_node_label_file, 'r', encoding='utf-8') as _node:
temp_node_name = _node.read().strip()
# add the node name/type and the corresponding temp file to the dict
temp_files[temp_node_name] = temp_node_value_file
temp_update = {} temp_update = {}
for temp_node, temp_file in TEMP_FILES.items(): for temp_node, temp_file in temp_files.items():
# iterate through the discovered temperature nodes # iterate through the discovered temperature nodes
# ... updating the dictionary with new stats # ... updating the dictionary with new stats
_temperature = int(int(read_stat(temp_file)) // 1000) _temperature = int(int(read_stat(temp_file)) // 1000)