From 005ac1216eff97fb1acc594114f273cef9b36d3c Mon Sep 17 00:00:00 2001 From: Andrew Date: Thu, 27 Jan 2022 20:43:23 -0500 Subject: [PATCH] Add crash detection --- app/classes/controllers/servers_controller.py | 4 ++++ app/classes/models/servers.py | 24 +++++++++++++++++-- app/classes/shared/server.py | 22 ++++++++++++----- app/classes/shared/tasks.py | 3 +++ app/classes/web/panel_handler.py | 17 +++++++------ 5 files changed, 55 insertions(+), 15 deletions(-) diff --git a/app/classes/controllers/servers_controller.py b/app/classes/controllers/servers_controller.py index 24b19cf7..0e0599c5 100644 --- a/app/classes/controllers/servers_controller.py +++ b/app/classes/controllers/servers_controller.py @@ -146,6 +146,10 @@ class Servers_Controller: return True return False + @staticmethod + def is_crashed(server_id): + return servers_helper.is_crashed(server_id) + @staticmethod def server_id_authorized_api_key(server_id: str, api_key: ApiKeys) -> bool: # TODO diff --git a/app/classes/models/servers.py b/app/classes/models/servers.py index d29e903e..8ea820a5 100644 --- a/app/classes/models/servers.py +++ b/app/classes/models/servers.py @@ -73,6 +73,7 @@ class Server_Stats(Model): updating = BooleanField(default=False) waiting_start = BooleanField(default=False) first_run = BooleanField(default=True) + crashed = BooleanField(default=False) class Meta: @@ -178,6 +179,25 @@ class helper_servers: return False return True + @staticmethod + def sever_crashed(server_id): + with database.atomic(): + Server_Stats.update(crashed=True).where(Server_Stats.server_id == server_id).execute() + + @staticmethod + def server_crash_reset(server_id): + with database.atomic(): + Server_Stats.update(crashed=False).where(Server_Stats.server_id == server_id).execute() + + @staticmethod + def is_crashed(server_id): + svr = Server_Stats.select().where(Server_Stats.server_id == server_id).get() + #pylint: disable=singleton-comparison + if svr.crashed == True: + return True + else: + return False + @staticmethod def set_update(server_id, value): try: @@ -190,8 +210,8 @@ class helper_servers: @staticmethod def get_update_status(server_id): - waiting_start = Server_Stats.select().where(Server_Stats.server_id == server_id).get() - return waiting_start.waiting_start + update_status = Server_Stats.select().where(Server_Stats.server_id == server_id).get() + return update_status.updating @staticmethod def set_first_run(server_id): diff --git a/app/classes/shared/server.py b/app/classes/shared/server.py index f2ed7e49..423fe94d 100644 --- a/app/classes/shared/server.py +++ b/app/classes/shared/server.py @@ -115,8 +115,12 @@ class Server: self.stats = stats tz = get_localzone() self.server_scheduler = BackgroundScheduler(timezone=str(tz)) + self.server_scheduler.start() self.backup_thread = threading.Thread(target=self.a_backup_server, daemon=True, name=f"backup_{self.name}") self.is_backingup = False + #Reset crash and update at initialization + servers_helper.server_crash_reset(self.server_id) + servers_helper.set_update(self.server_id, False) def reload_server_settings(self): server_data = servers_helper.get_server_data_by_id(self.server_id) @@ -141,7 +145,6 @@ class Server: console.info(f"Scheduling server {self.name} to start in {delay} seconds") self.server_scheduler.add_job(self.run_scheduled_server, 'interval', seconds=delay, id=str(self.server_id)) - self.server_scheduler.start() def run_scheduled_server(self): console.info(f"Starting server ID: {self.server_id} - {self.name}") @@ -262,6 +265,7 @@ class Server: threading.Thread(target=out_buf.check, daemon=True, name=f'{self.server_id}_virtual_terminal').start() self.is_crashed = False + servers_helper.server_crash_reset(self.server_id) self.start_time = str(datetime.datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S')) @@ -269,6 +273,7 @@ class Server: logger.info(f"Server {self.name} running with PID {self.process.pid}") console.info(f"Server {self.name} running with PID {self.process.pid}") self.is_crashed = False + servers_helper.server_crash_reset(self.server_id) self.stats.record_stats() check_internet_thread = threading.Thread( target=self.check_internet_thread, daemon=True, args=(user_id, user_lang, ), name=f"{self.name}_Internet") @@ -299,7 +304,7 @@ class Server: logger.info(f"Server {self.name} has crash detection enabled - starting watcher task") console.info(f"Server {self.name} has crash detection enabled - starting watcher task") - self.server_scheduler.add_job(self.detect_crash, 'interval', seconds=30, id="c_{self.server_id}") + self.server_scheduler.add_job(self.detect_crash, 'interval', seconds=30, id=f"c_{self.server_id}") def check_internet_thread(self, user_id, user_lang): if user_id: @@ -317,6 +322,9 @@ class Server: def stop_server(self): if self.settings['stop_command']: self.send_command(self.settings['stop_command']) + #remove crash detection watcher + logger.info(f"Removing crash watcher for server {self.name}") + self.server_scheduler.remove_job('c_' + str(self.server_id)) else: #windows will need to be handled separately for Ctrl+C self.process.terminate() @@ -398,8 +406,9 @@ class Server: def crash_detected(self, name): + print("crash detected") # clear the old scheduled watcher task - self.remove_watcher_thread() + self.server_scheduler.remove_job("c_"+str(self.server_id)) # the server crashed, or isn't found - so let's reset things. logger.warning(f"The server {name} seems to have vanished unexpectedly, did it crash?") @@ -449,6 +458,7 @@ class Server: if running: return + servers_helper.sever_crashed(self.server_id) # if we haven't tried to restart more 3 or more times if self.restart_count <= 3: @@ -464,12 +474,12 @@ class Server: logger.critical(f"Server {self.name} has been restarted {self.restart_count} times. It has crashed, not restarting.") console.critical(f"Server {self.name} has been restarted {self.restart_count} times. It has crashed, not restarting.") - # set to 99 restart attempts so this elif is skipped next time. (no double logging) - self.restart_count = 99 + self.restart_count = 0 self.is_crashed = True + servers_helper.sever_crashed(self.server_id) # cancel the watcher task - self.remove_watcher_thread() + self.server_scheduler.remove_job("c_"+str(self.server_id)) def remove_watcher_thread(self): logger.info("Removing old crash detection watcher thread") diff --git a/app/classes/shared/tasks.py b/app/classes/shared/tasks.py index 03fadbea..8a2c6855 100644 --- a/app/classes/shared/tasks.py +++ b/app/classes/shared/tasks.py @@ -16,6 +16,7 @@ from app.classes.minecraft.serverjars import server_jar_obj from app.classes.models.management import management_helper from app.classes.controllers.users_controller import Users_Controller from app.classes.controllers.servers_controller import Servers_Controller +from app.classes.models.servers import servers_helper logger = logging.getLogger('apscheduler') @@ -454,6 +455,7 @@ class TasksManager: 'players': srv['raw_ping_result'].get('players'), 'desc': srv['raw_ping_result'].get('desc'), 'version': srv['raw_ping_result'].get('version'), + 'crashed': servers_helper.is_crashed(server_id), }) if len(websocket_helper.clients) > 0: websocket_helper.broadcast_user_page_params( @@ -478,6 +480,7 @@ class TasksManager: 'players': srv['raw_ping_result'].get('players'), 'desc': srv['raw_ping_result'].get('desc'), 'version': srv['raw_ping_result'].get('version'), + 'crashed': servers_helper.is_crashed(server_id), } ) total_players += int(srv['raw_ping_result'].get('online')) diff --git a/app/classes/web/panel_handler.py b/app/classes/web/panel_handler.py index 2f256917..3b4772b2 100644 --- a/app/classes/web/panel_handler.py +++ b/app/classes/web/panel_handler.py @@ -295,13 +295,15 @@ class PanelHandler(BaseHandler): list(filter(lambda x: x['stats']['running'], page_data['servers']))) page_data['server_stats']['stopped'] = len(page_data['servers']) - page_data['server_stats']['running'] - for data in page_data['servers']: - try: - data['stats']['waiting_start'] = self.controller.servers.get_waiting_start( - str(data['stats']['server_id']['server_id'])) - except Exception as e: - logger.error(f"Failed to get server waiting to start: {e}") - data['stats']['waiting_start'] = False + for data in page_data['servers']: + data['stats']['crashed'] = self.controller.servers.is_crashed( + str(data['stats']['server_id']['server_id'])) + try: + data['stats']['waiting_start'] = self.controller.servers.get_waiting_start( + str(data['stats']['server_id']['server_id'])) + except Exception as e: + logger.error(f"Failed to get server waiting to start: {e}") + data['stats']['waiting_start'] = False try: self.fetch_server_data(page_data) @@ -348,6 +350,7 @@ class PanelHandler(BaseHandler): 'Players': Enum_Permissions_Server.Players, } page_data['user_permissions'] = self.controller.server_perms.get_user_id_permissions_list(exec_user["user_id"], server_id) + page_data['server_stats']['crashed'] = self.controller.servers.is_crashed(server_id) if subpage == 'term': if not page_data['permissions']['Terminal'] in page_data['user_permissions']: