mirror of
https://gitlab.com/crafty-controller/crafty-4.git
synced 2024-08-30 18:23:09 +00:00
patches for issue #4:
biggest issue causing the cascade was building crash detection in check_running method that method is called everywhere, so when called (sometimes 5 times at the same time) it tries to restart the server over and over i created a new detect_crash method that will now look for crashes and removed all crash detection from check_running method also added the remove_watcher_thread method to remove the old scheduled task watching the older server.
This commit is contained in:
parent
688117ec87
commit
6b686c5b47
@ -44,6 +44,7 @@ class Server:
|
|||||||
self.name = None
|
self.name = None
|
||||||
self.is_crashed = False
|
self.is_crashed = False
|
||||||
self.restart_count = 0
|
self.restart_count = 0
|
||||||
|
self.crash_watcher_schedule = None
|
||||||
|
|
||||||
def reload_server_settings(self):
|
def reload_server_settings(self):
|
||||||
server_data = db_helper.get_server_data_by_id(self.server_id)
|
server_data = db_helper.get_server_data_by_id(self.server_id)
|
||||||
@ -168,8 +169,7 @@ class Server:
|
|||||||
logger.info("Server {} has crash detection enabled - starting watcher task".format(self.name))
|
logger.info("Server {} has crash detection enabled - starting watcher task".format(self.name))
|
||||||
console.info("Server {} has crash detection enabled - starting watcher task".format(self.name))
|
console.info("Server {} has crash detection enabled - starting watcher task".format(self.name))
|
||||||
|
|
||||||
# TODO: create crash detection watcher and such
|
self.crash_watcher_schedule = schedule.every(30).seconds.do(self.detect_crash).tag(self.name)
|
||||||
# schedule.every(30).seconds.do(self.check_running).tag(self.name)
|
|
||||||
|
|
||||||
def stop_threaded_server(self):
|
def stop_threaded_server(self):
|
||||||
self.stop_server()
|
self.stop_server()
|
||||||
@ -232,56 +232,21 @@ class Server:
|
|||||||
self.updating = False
|
self.updating = False
|
||||||
self.process = None
|
self.process = None
|
||||||
|
|
||||||
def check_running(self, shutting_down=False):
|
def check_running(self):
|
||||||
|
running = False
|
||||||
|
|
||||||
# if process is None, we never tried to start
|
# if process is None, we never tried to start
|
||||||
if self.PID is None:
|
if self.PID is None:
|
||||||
return False
|
return running
|
||||||
|
|
||||||
try:
|
try:
|
||||||
running = psutil.pid_exists(self.PID)
|
running = psutil.pid_exists(self.PID)
|
||||||
logger.info("Checking if PID: {} is running".format(self.PID))
|
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error("Unable to find if server PID exists: {}".format(self.PID))
|
logger.error("Unable to find if server PID exists: {}".format(self.PID))
|
||||||
running = False
|
|
||||||
pass
|
pass
|
||||||
|
|
||||||
if not running:
|
return running
|
||||||
|
|
||||||
# did the server crash?
|
|
||||||
if not shutting_down:
|
|
||||||
|
|
||||||
# do we have crash detection turned on?
|
|
||||||
if self.settings['crash_detection']:
|
|
||||||
|
|
||||||
# if we haven't tried to restart more 3 or more times
|
|
||||||
if self.restart_count <= 3:
|
|
||||||
|
|
||||||
# start the server if needed
|
|
||||||
server_restarted = self.crash_detected(self.name)
|
|
||||||
|
|
||||||
if server_restarted:
|
|
||||||
# add to the restart count
|
|
||||||
self.restart_count = self.restart_count + 1
|
|
||||||
return False
|
|
||||||
|
|
||||||
# we have tried to restart 4 times...
|
|
||||||
elif self.restart_count == 4:
|
|
||||||
logger.warning("Server {} has been restarted {} times. It has crashed, not restarting.".format(
|
|
||||||
self.name, self.restart_count))
|
|
||||||
|
|
||||||
# set to 99 restart attempts so this elif is skipped next time. (no double logging)
|
|
||||||
self.restart_count = 99
|
|
||||||
self.is_crashed = True
|
|
||||||
return False
|
|
||||||
else:
|
|
||||||
self.is_crashed = True
|
|
||||||
return False
|
|
||||||
|
|
||||||
self.cleanup_server_object()
|
|
||||||
return False
|
|
||||||
|
|
||||||
return True
|
|
||||||
|
|
||||||
def send_command(self, command):
|
def send_command(self, command):
|
||||||
|
|
||||||
@ -296,15 +261,22 @@ class Server:
|
|||||||
|
|
||||||
def crash_detected(self, name):
|
def crash_detected(self, name):
|
||||||
|
|
||||||
|
# clear the old scheduled watcher task
|
||||||
|
self.remove_watcher_thread()
|
||||||
|
|
||||||
# the server crashed, or isn't found - so let's reset things.
|
# the server crashed, or isn't found - so let's reset things.
|
||||||
logger.warning("The server {} seems to have vanished unexpectedly, did it crash?".format(name))
|
logger.warning("The server {} seems to have vanished unexpectedly, did it crash?".format(name))
|
||||||
|
|
||||||
if self.settings['crash_detection']:
|
if self.settings['crash_detection']:
|
||||||
logger.info("The server {} has crashed and will be restarted. Restarting server".format(name))
|
logger.warning("The server {} has crashed and will be restarted. Restarting server".format(name))
|
||||||
|
console.warning("The server {} has crashed and will be restarted. Restarting server".format(name))
|
||||||
self.run_threaded_server()
|
self.run_threaded_server()
|
||||||
return True
|
return True
|
||||||
else:
|
else:
|
||||||
logger.info("The server {} has crashed, crash detection is disabled and it will not be restarted".format(name))
|
logger.critical(
|
||||||
|
"The server {} has crashed, crash detection is disabled and it will not be restarted".format(name))
|
||||||
|
console.critical(
|
||||||
|
"The server {} has crashed, crash detection is disabled and it will not be restarted".format(name))
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def killpid(self, pid):
|
def killpid(self, pid):
|
||||||
@ -326,4 +298,42 @@ class Server:
|
|||||||
else:
|
else:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
def detect_crash(self):
|
||||||
|
|
||||||
|
logger.info("Detecting possible crash for server: {} ".format(self.name))
|
||||||
|
|
||||||
|
running = self.check_running()
|
||||||
|
|
||||||
|
# if all is okay, we just exit out
|
||||||
|
if running:
|
||||||
|
return
|
||||||
|
|
||||||
|
# if we haven't tried to restart more 3 or more times
|
||||||
|
if self.restart_count <= 3:
|
||||||
|
|
||||||
|
# start the server if needed
|
||||||
|
server_restarted = self.crash_detected(self.name)
|
||||||
|
|
||||||
|
if server_restarted:
|
||||||
|
# add to the restart count
|
||||||
|
self.restart_count = self.restart_count + 1
|
||||||
|
|
||||||
|
# we have tried to restart 4 times...
|
||||||
|
elif self.restart_count == 4:
|
||||||
|
logger.critical("Server {} has been restarted {} times. It has crashed, not restarting.".format(
|
||||||
|
self.name, self.restart_count))
|
||||||
|
|
||||||
|
console.critical("Server {} has been restarted {} times. It has crashed, not restarting.".format(
|
||||||
|
self.name, self.restart_count))
|
||||||
|
|
||||||
|
# set to 99 restart attempts so this elif is skipped next time. (no double logging)
|
||||||
|
self.restart_count = 99
|
||||||
|
self.is_crashed = True
|
||||||
|
|
||||||
|
# cancel the watcher task
|
||||||
|
self.remove_watcher_thread()
|
||||||
|
|
||||||
|
def remove_watcher_thread(self):
|
||||||
|
logger.info("Removing old crash detection watcher thread")
|
||||||
|
console.info("Removing old crash detection watcher thread")
|
||||||
|
schedule.clear(self.name)
|
||||||
|
Loading…
Reference in New Issue
Block a user