Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Cleanups and sanity-checking in cpu and db metrics #5499

Merged
merged 7 commits into from
Jun 24, 2019
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 33 additions & 7 deletions synapse/util/logcontext.py
Original file line number Diff line number Diff line change
Expand Up @@ -325,10 +325,9 @@ def stop(self):
)
return

usage_end = get_thread_resource_usage()

self._resource_usage.ru_utime += usage_end.ru_utime - self.usage_start.ru_utime
self._resource_usage.ru_stime += usage_end.ru_stime - self.usage_start.ru_stime
utime_delta, stime_delta = self._get_cputime()
self._resource_usage.ru_utime += utime_delta
self._resource_usage.ru_stime += stime_delta

self.usage_start = None

Expand All @@ -346,13 +345,38 @@ def get_resource_usage(self):
# can include resource usage so far.
is_main_thread = threading.current_thread() is self.main_thread
if self.alive and self.usage_start and is_main_thread:
current = get_thread_resource_usage()
res.ru_utime += current.ru_utime - self.usage_start.ru_utime
res.ru_stime += current.ru_stime - self.usage_start.ru_stime
utime_delta, stime_delta = self._get_cputime()
res.ru_utime += utime_delta
res.ru_stime += stime_delta

return res

def _get_cputime(self):
"""Get the cpu usage time so far

Returns: Tuple[float, float]: seconds in user mode, seconds in system mode
"""
current = get_thread_resource_usage()

utime_delta = current.ru_utime - self.usage_start.ru_utime
stime_delta = current.ru_stime - self.usage_start.ru_stime

# sanity check
if utime_delta < 0:
raise ValueError("utime went backwards! %f < %f" % (
current.ru_utime, self.usage_start.ru_utime,
))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's the effect of raising here? Will this just take out metrics? Do we want to log.error but return 0 instead?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, it will take out whatever is running at this point.

My concern with log.error is that it's never going to get noticed, and if it is, we won't be able to figure out why it is happening.


if stime_delta < 0:
raise ValueError("stime went backwards! %f < %f" % (
current.ru_stime, self.usage_start.ru_stime,
))

return utime_delta, stime_delta

def add_database_transaction(self, duration_sec):
if duration_sec < 0:
raise ValueError('DB txn time can only be non-negative')
self._resource_usage.db_txn_count += 1
self._resource_usage.db_txn_duration_sec += duration_sec

Expand All @@ -363,6 +387,8 @@ def add_database_scheduled(self, sched_sec):
sched_sec (float): number of seconds it took us to get a
connection
"""
if sched_sec < 0:
raise ValueError('DB scheduling time can only be non-negative')
self._resource_usage.db_sched_duration_sec += sched_sec

def record_event_fetch(self, event_count):
Expand Down