Resolves wookayin#19 Add option power stats

cjw85 · Sep 19, 2017 · be1330e · be1330e
1 parent a38bc5f
commit be1330e
Show file tree

Hide file tree

Showing 2 changed files with 61 additions and 9 deletions.
diff --git a/gpustat.py b/gpustat.py
@@ -113,6 +113,24 @@ def utilization(self):
         v = self.entry['utilization.gpu']
         return int(v) if v is not None else None
 
+    @property
+    def power_usage(self):
+        """
+        Returns the GPU power usage in Watts,
+        or None if the information is not available.
+        """
+        v = self.entry['power.use']
+        return int(v) if v is not None else None
+
+    @property
+    def power_limit(self):
+        """
+        Returns the (enforced) GPU power limit in Watts,
+        or None if the information is not available.
+        """
+        v = self.entry['power.limit']
+        return int(v) if v is not None else None
+
     @property
     def processes(self):
         """
@@ -126,6 +144,7 @@ def print_to(self, fp,
                  show_cmd=False,
                  show_user=False,
                  show_pid=False,
+                 show_power=False,
                  gpuname_width=16,
                  term=Terminal(),
                  ):
@@ -150,6 +169,8 @@ def _conditional(cond_fn, true_value, false_value,
         colors['CUser'] = term.bold_black   # gray
         colors['CUtil'] = _conditional(lambda: int(self.entry['utilization.gpu']) < 30,
                                        term.green, term.bold_green)
+        colors['CPowU'] = term.bold_red
+        colors['CPowL'] = term.red
 
         if not with_colors:
             for k in list(colors.keys()):
@@ -160,10 +181,14 @@ def _repr(v, none_value='??'):
             else: return str(v)
 
         # build one-line display information
-        reps = ("%(C1)s[{entry[index]}]%(C0)s %(CName)s{entry[name]:{gpuname_width}}%(C0)s |" +
-                "%(CTemp)s{entry[temperature.gpu]:>3}'C%(C0)s, %(CUtil)s{entry[utilization.gpu]:>3} %%%(C0)s | " +
-                "%(C1)s%(CMemU)s{entry[memory.used]:>5}%(C0)s / %(CMemT)s{entry[memory.total]:>5}%(C0)s MB"
-                ) % colors
+        # we want power use optional, but if deserves being grouped with temperature and utilization
+        reps = "%(C1)s[{entry[index]}]%(C0)s %(CName)s{entry[name]:{gpuname_width}}%(C0)s |" \
+               "%(CTemp)s{entry[temperature.gpu]:>3}'C%(C0)s, %(CUtil)s{entry[utilization.gpu]:>3} %%%(C0)s"
+
+        if show_power:
+            reps += ",  %(CPowU)s{entry[power.use]:>3}%(C0)s / %(CPowL)s{entry[power.limit]:>3}%(C0)s W" 
+        reps += " | %(C1)s%(CMemU)s{entry[memory.used]:>5}%(C0)s / %(CMemT)s{entry[memory.total]:>5}%(C0)s MB"
+        reps = (reps) % colors
         reps = reps.format(entry={k: _repr(v) for (k, v) in self.entry.items()},
                            gpuname_width=gpuname_width)
         reps += " |"
@@ -252,6 +277,16 @@ def _decode(b):
             except N.NVMLError:
                 utilization = None  # Not supported
 
+            try:
+                power = N.nvmlDeviceGetPowerUsage(handle)
+            except:
+                power = None
+
+            try:
+                power_limit = N.nvmlDeviceGetEnforcedPowerLimit(handle)
+            except:
+                power_limit = None
+
             processes = []
             try:
                 nv_comp_processes = N.nvmlDeviceGetComputeRunningProcesses(handle)
@@ -284,6 +319,8 @@ def _decode(b):
                 'name': name,
                 'temperature.gpu': temperature,
                 'utilization.gpu': utilization.gpu if utilization else None,
+                'power.use': int(power / 1000) if power is not None else None,
+                'power.limit': int(power_limit / 1000) if power is not None else None,
                 # Convert bytes into MBytes
                 'memory.used': int(memory.used / 1024 / 1024) if memory else None,
                 'memory.total': int(memory.total / 1024 / 1024) if memory else None,
@@ -323,7 +360,7 @@ def __repr__(self):
 
     def print_formatted(self, fp=sys.stdout, force_color=False, no_color=False,
                         show_cmd=False, show_user=False, show_pid=False,
-                        gpuname_width=16,
+                        show_power=False, gpuname_width=16,
                         ):
         # ANSI color configuration
         if force_color and no_color:
@@ -355,6 +392,7 @@ def print_formatted(self, fp=sys.stdout, force_color=False, no_color=False,
                        show_cmd=show_cmd,
                        show_user=show_user,
                        show_pid=show_pid,
+                       show_power=show_power,
                        gpuname_width=gpuname_width,
                        term=t_color)
             fp.write('\n')
@@ -430,6 +468,8 @@ def main():
                         help='Display username of running process')
     parser.add_argument('-p', '--show-pid', action='store_true',
                         help='Display PID of running process')
+    parser.add_argument('-P', '--show-power', action='store_true',
+                        help='Show GPU power usage (and limit)')
     parser.add_argument('--gpuname-width', type=int, default=16,
                         help='The minimum column width of GPU names, defaults to 16')
     parser.add_argument('--json', action='store_true', default=False,

diff --git a/test_gpustat.py b/test_gpustat.py
@@ -72,6 +72,18 @@ def _decorated(*args, **kwargs):
         mock_handles[2]: 71,
     }.get(handle, RuntimeError))
 
+    N.nvmlDeviceGetPowerUsage = _raise_ex(lambda handle: {
+        mock_handles[0]: 125000,
+        mock_handles[1]: 100000,
+        mock_handles[2]: 250000,
+    }.get(handle, RuntimeError))
+
+    N.nvmlDeviceGetEnforcedPowerLimit = _raise_ex(lambda handle: {
+        mock_handles[0]: 250000,
+        mock_handles[1]: 250000,
+        mock_handles[2]: 250000,
+    }.get(handle, RuntimeError))
+
     mock_memory_t = namedtuple("Memory_t", ['total', 'used'])
     N.nvmlDeviceGetMemoryInfo.side_effect = _raise_ex(lambda handle: {
         mock_handles[0]: mock_memory_t(total=12883853312, used=8000*MB),
@@ -147,7 +159,7 @@ def test_new_query_mocked(self, N, Process):
 
         gpustats = gpustat.new_query()
         fp = StringIO()
-        gpustats.print_formatted(fp=fp, no_color=False, show_user=True, show_cmd=True, show_pid=True)
+        gpustats.print_formatted(fp=fp, no_color=False, show_user=True, show_cmd=True, show_pid=True, show_power=True)
 
         result = fp.getvalue()
         print(result)
@@ -157,9 +169,9 @@ def test_new_query_mocked(self, N, Process):
         unescaped = '\n'.join(unescaped.split('\n')[1:])
 
         expected = """\
-[0] GeForce GTX TITAN 0 | 80'C,  76 % |  8000 / 12287 MB | user1:python/48448(4000M) user2:python/153223(4000M)
-[1] GeForce GTX TITAN 1 | 36'C,   0 % |  9000 / 12189 MB | user1:torch/192453(3000M) user3:caffe/194826(6000M)
-[2] GeForce GTX TITAN 2 | 71'C,  ?? % |     0 / 12189 MB | (Not Supported)
+[0] GeForce GTX TITAN 0 | 80'C,  76 %,  125 / 250 W |  8000 / 12287 MB | user1:python/48448(4000M) user2:python/153223(4000M)
+[1] GeForce GTX TITAN 1 | 36'C,   0 %,  100 / 250 W |  9000 / 12189 MB | user1:torch/192453(3000M) user3:caffe/194826(6000M)
+[2] GeForce GTX TITAN 2 | 71'C,  ?? %,  250 / 250 W |     0 / 12189 MB | (Not Supported)
 """
         self.maxDiff = 4096
         self.assertEqual(unescaped, expected)