Merge branch 'tweak/host-stats-exceptions' into 'dev'

Add better error logging for statistic collection

See merge request crafty-controller/crafty-4!359
This commit is contained in:
Iain Powrie 2022-06-17 23:38:52 +00:00
commit 990f038a04
2 changed files with 43 additions and 33 deletions

View File

@ -9,12 +9,12 @@
TBD TBD
### Tweaks ### Tweaks
TBD - Add better error logging for statistic collection ([Merge Request](https://gitlab.com/crafty-controller/crafty-4/-/merge_requests/359))
## [4.0.2-hotfix1] - 2022/06/17 ## [4.0.2-hotfix1] - 2022/06/17
### Crit Bug fixes ### Crit Bug fixes
Fix blank server_detail page for general users ([Merge Request](https://gitlab.com/crafty-controller/crafty-4/-/merge_requests/358)) - Fix blank server_detail page for general users ([Merge Request](https://gitlab.com/crafty-controller/crafty-4/-/merge_requests/358))
## [4.0.2] - 2022/06/16 ## [4.0.2] - 2022/06/16

View File

@ -63,7 +63,9 @@ class Stats:
psutil.boot_time(), datetime.timezone.utc psutil.boot_time(), datetime.timezone.utc
) )
except Exception as e: except Exception as e:
logger.debug(f"error while getting boot time due to {e}") logger.debug(
"getting boot time failed due to the following error:", exc_info=e
)
# unix epoch with no timezone data # unix epoch with no timezone data
return datetime.datetime.fromtimestamp(0, datetime.timezone.utc) return datetime.datetime.fromtimestamp(0, datetime.timezone.utc)
@ -72,7 +74,9 @@ class Stats:
try: try:
return psutil.cpu_percent(interval=0.5) / psutil.cpu_count() return psutil.cpu_percent(interval=0.5) / psutil.cpu_count()
except Exception as e: except Exception as e:
logger.debug(f"error while getting cpu percentage due to {e}") logger.debug(
"getting the cpu usage failed due to the following error:", exc_info=e
)
return -1 return -1
def __init__(self, helper, controller): def __init__(self, helper, controller):
@ -100,7 +104,9 @@ class Stats:
"disk_data": Stats._try_all_disk_usage(), "disk_data": Stats._try_all_disk_usage(),
} }
except Exception as e: except Exception as e:
logger.debug(f"error while getting host stats due to {e}") logger.debug(
"getting host stats failed due to the following error:", exc_info=e
)
node_stats: NodeStatsDict = { node_stats: NodeStatsDict = {
"boot_time": str( "boot_time": str(
datetime.datetime.fromtimestamp(0, datetime.timezone.utc) datetime.datetime.fromtimestamp(0, datetime.timezone.utc)
@ -128,46 +134,43 @@ class Stats:
try: try:
return Stats._get_process_stats(process) return Stats._get_process_stats(process)
except Exception as e: except Exception as e:
logger.debug(f"error while getting process stats due to {e}") logger.debug(
f"getting process stats for pid {process.pid} "
"failed due to the following error:",
exc_info=e,
)
return {"cpu_usage": -1, "memory_usage": -1, "mem_percentage": -1} return {"cpu_usage": -1, "memory_usage": -1, "mem_percentage": -1}
@staticmethod @staticmethod
def _get_process_stats(process): def _get_process_stats(process):
if process is None: if process is None:
return {"cpu_usage": 0, "memory_usage": 0, "mem_percentage": 0} return {"cpu_usage": -1, "memory_usage": -1, "mem_percentage": -1}
process_pid = process.pid process_pid = process.pid
try: p = psutil.Process(process_pid)
p = psutil.Process(process_pid) _dummy = p.cpu_percent()
dummy = p.cpu_percent()
# call it first so we can be more accurate per the docs # call it first so we can be more accurate per the docs
# https://giamptest.readthedocs.io/en/latest/#psutil.Process.cpu_percent # https://giamptest.readthedocs.io/en/latest/#psutil.Process.cpu_percent
real_cpu = round(p.cpu_percent(interval=0.5) / psutil.cpu_count(), 2) real_cpu = round(p.cpu_percent(interval=0.5) / psutil.cpu_count(), 2)
# this is a faster way of getting data for a process # this is a faster way of getting data for a process
with p.oneshot(): with p.oneshot():
process_stats = { process_stats = {
"cpu_usage": real_cpu, "cpu_usage": real_cpu,
"memory_usage": Helpers.human_readable_file_size( "memory_usage": Helpers.human_readable_file_size(p.memory_info()[0]),
p.memory_info()[0] "mem_percentage": round(p.memory_percent(), 0),
), }
"mem_percentage": round(p.memory_percent(), 0), return process_stats
}
return process_stats
except Exception as e:
logger.error(
f"Unable to get process details for pid: {process_pid} Error: {e}"
)
return {"cpu_usage": 0, "memory_usage": 0, "mem_percentage": 0}
@staticmethod @staticmethod
def _try_all_disk_usage(): def _try_all_disk_usage():
try: try:
return Stats._all_disk_usage() return Stats._all_disk_usage()
except Exception as e: except Exception as e:
logger.debug(f"error while getting disk data due to {e}") logger.debug(
"getting disk stats failed due to the following error:", exc_info=e
)
return [] return []
# Source: https://github.com/giampaolo/psutil/blob/master/scripts/disk_usage.py # Source: https://github.com/giampaolo/psutil/blob/master/scripts/disk_usage.py
@ -246,14 +249,19 @@ class Stats:
online_stats = json.loads(ping_obj.players) online_stats = json.loads(ping_obj.players)
except Exception as e: except Exception as e:
logger.info(f"Unable to read json from ping_obj: {e}") logger.info(
"Unable to read json from ping_obj due to the following error:",
exc_info=e,
)
try: try:
server_icon = base64.encodebytes(ping_obj.icon) server_icon = base64.encodebytes(ping_obj.icon)
server_icon = server_icon.decode("utf-8") server_icon = server_icon.decode("utf-8")
except Exception as e: except Exception as e:
server_icon = False server_icon = False
logger.info(f"Unable to read the server icon : {e}") logger.info(
"Unable to read the server icon due to the following error:", exc_info=e
)
ping_data = { ping_data = {
"online": online_stats.get("online", 0), "online": online_stats.get("online", 0),
@ -273,7 +281,9 @@ class Stats:
server_icon = base64.encodebytes(ping_obj["icon"]) server_icon = base64.encodebytes(ping_obj["icon"])
except Exception as e: except Exception as e:
server_icon = False server_icon = False
logger.info(f"Unable to read the server icon : {e}") logger.info(
"Unable to read the server icon due to the following error:", exc_info=e
)
ping_data = { ping_data = {
"online": ping_obj["server_player_count"], "online": ping_obj["server_player_count"],
"max": ping_obj["server_player_max"], "max": ping_obj["server_player_max"],