From 51459c0733509df3b75cb41b57c4c3b25e0737d1 Mon Sep 17 00:00:00 2001 From: Ben Wallis Date: Sat, 3 Oct 2020 20:10:34 +0100 Subject: [PATCH] Implemented graceful shutdown on SIGUSR1 signal. Added shutdown TUI command. Added abortshutdown TUI command. Fixed a bug in TUI that caused a panic on quit in basic mode on windows. --- Cargo.lock | 1 + server-cli/Cargo.toml | 1 + server-cli/Dockerfile | 3 + server-cli/docker-compose.yml | 2 +- server-cli/src/main.rs | 46 ++++++- server-cli/src/shutdown_coordinator.rs | 176 +++++++++++++++++++++++++ server-cli/src/tui_runner.rs | 60 ++++++--- server/src/lib.rs | 4 + server/src/settings.rs | 4 + 9 files changed, 275 insertions(+), 22 deletions(-) create mode 100644 server-cli/src/shutdown_coordinator.rs diff --git a/Cargo.lock b/Cargo.lock index 2a3882eec8..e45c6f8c12 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4712,6 +4712,7 @@ dependencies = [ "clap", "crossterm", "lazy_static", + "signal-hook", "tracing", "tracing-subscriber", "tracing-tracy", diff --git a/server-cli/Cargo.toml b/server-cli/Cargo.toml index 4065998b8d..236c23f79f 100644 --- a/server-cli/Cargo.toml +++ b/server-cli/Cargo.toml @@ -17,6 +17,7 @@ ansi-parser = "0.6" clap = "2.33" crossterm = "0.17" lazy_static = "1" +signal-hook = "0.1.16" tracing = { version = "0.1", default-features = false } tracing-subscriber = { version = "0.2.3", default-features = false, features = ["env-filter", "fmt", "chrono", "ansi", "smallvec"] } diff --git a/server-cli/Dockerfile b/server-cli/Dockerfile index 3ed1434712..166f668556 100644 --- a/server-cli/Dockerfile +++ b/server-cli/Dockerfile @@ -1,5 +1,8 @@ FROM debian:stable-slim +# SIGUSR1 causes veloren-server-cli to initiate a graceful shutdown +LABEL com.centurylinklabs.watchtower.stop-signal="SIGUSR1" + ARG PROJECTNAME=server-cli # librust-backtrace+libbacktrace-dev = backtrace functionality diff --git a/server-cli/docker-compose.yml b/server-cli/docker-compose.yml index 8c81a5c7ef..355ea0f304 100644 --- a/server-cli/docker-compose.yml +++ b/server-cli/docker-compose.yml @@ -16,4 +16,4 @@ services: image: containrrr/watchtower volumes: - /var/run/docker.sock:/var/run/docker.sock - command: --interval 30 --cleanup veloren-game-server-master + command: --interval 30 --stop-timeout 130s --cleanup veloren-game-server-master diff --git a/server-cli/src/main.rs b/server-cli/src/main.rs index 801ac8566d..4d2d6fb8b6 100644 --- a/server-cli/src/main.rs +++ b/server-cli/src/main.rs @@ -1,24 +1,32 @@ #![deny(unsafe_code)] #![deny(clippy::clone_on_ref_ptr)] +mod shutdown_coordinator; mod tui_runner; mod tuilog; #[macro_use] extern crate lazy_static; use crate::{ + shutdown_coordinator::ShutdownCoordinator, tui_runner::{Message, Tui}, tuilog::TuiLog, }; use common::clock::Clock; use server::{Event, Input, Server, ServerSettings}; +#[cfg(any(target_os = "linux", target_os = "macos"))] +use signal_hook::SIGUSR1; use tracing::{info, Level}; use tracing_subscriber::{filter::LevelFilter, EnvFilter, FmtSubscriber}; #[cfg(feature = "tracy")] use tracing_subscriber::{layer::SubscriberExt, prelude::*}; use clap::{App, Arg}; -use std::{io, sync::mpsc, time::Duration}; +use std::{ + io, + sync::{atomic::AtomicBool, mpsc, Arc}, + time::Duration, +}; const TPS: u64 = 30; const RUST_LOG_ENV: &str = "RUST_LOG"; @@ -42,6 +50,12 @@ fn main() -> io::Result<()> { .get_matches(); let basic = matches.is_present("basic"); + + let sigusr1_signal = Arc::new(AtomicBool::new(false)); + + #[cfg(any(target_os = "linux", target_os = "macos"))] + let _ = signal_hook::flag::register(SIGUSR1, Arc::clone(&sigusr1_signal)); + let (mut tui, msg_r) = Tui::new(); // Init logging @@ -89,6 +103,14 @@ fn main() -> io::Result<()> { } } + // Panic hook to ensure that console mode is set back correctly if in non-basic + // mode + let hook = std::panic::take_hook(); + std::panic::set_hook(Box::new(move |info| { + Tui::shutdown(basic); + hook(info); + })); + tui.run(basic); info!("Starting server..."); @@ -103,11 +125,20 @@ fn main() -> io::Result<()> { // Create server let mut server = Server::new(settings).expect("Failed to create server instance!"); - info!("Server is ready to accept connections."); - info!(?metrics_port, "starting metrics at port"); - info!(?server_port, "starting server at port"); + info!( + ?server_port, + ?metrics_port, + "Server is ready to accept connections." + ); + + let mut shutdown_coordinator = ShutdownCoordinator::new(Arc::clone(&sigusr1_signal)); loop { + // Terminate the server if instructed to do so by the shutdown coordinator + if shutdown_coordinator.check(&mut server) { + break; + } + let events = server .tick(Input::default(), clock.get_last_delta()) .expect("Failed to tick server"); @@ -127,6 +158,13 @@ fn main() -> io::Result<()> { match msg_r.try_recv() { Ok(msg) => match msg { + Message::AbortShutdown => shutdown_coordinator.abort_shutdown(&mut server), + Message::Shutdown { grace_period } => { + // TODO: The TUI parser doesn't support quoted strings so it is not currently + // possible to provide a shutdown reason from the console. + let message = "The server is shutting down".to_owned(); + shutdown_coordinator.initiate_shutdown(&mut server, grace_period, message); + }, Message::Quit => { info!("Closing the server"); break; diff --git a/server-cli/src/shutdown_coordinator.rs b/server-cli/src/shutdown_coordinator.rs new file mode 100644 index 0000000000..1716ea1f04 --- /dev/null +++ b/server-cli/src/shutdown_coordinator.rs @@ -0,0 +1,176 @@ +use common::comp::chat::ChatType; +use server::Server; +use std::{ + ops::Add, + sync::{ + atomic::{AtomicBool, Ordering}, + Arc, + }, + time::{Duration, Instant}, +}; +use tracing::{error, info}; + +/// Coordinates the shutdown procedure for the server, which can be initiated by +/// either the TUI console interface or by sending the server the SIGUSR1 signal +/// which indicates the server is restarting due to an update. +pub(crate) struct ShutdownCoordinator { + /// The instant that the last shutdown message was sent, used for + /// calculating when to send the next shutdown message + last_shutdown_msg: Instant, + /// The interval that shutdown warning messages are sent at + msg_interval: Duration, + /// The instant that shudown was initiated at + shutdown_initiated_at: Option, + /// The period to wait before shutting down after shutdown is initiated + shutdown_grace_period: Duration, + /// The message to use for the shutdown warning message that is sent to all + /// connected players + shutdown_message: String, + /// Provided by `signal_hook` to allow observation of the SIGUSR1 signal + sigusr1_signal: Arc, +} + +impl ShutdownCoordinator { + pub fn new(sigusr1_signal: Arc) -> Self { + Self { + last_shutdown_msg: Instant::now(), + msg_interval: Duration::from_secs(30), + shutdown_initiated_at: None, + shutdown_grace_period: Duration::from_secs(0), + shutdown_message: String::new(), + sigusr1_signal, + } + } + + /// Initiates a graceful shutdown of the server using the specified grace + /// period and message. When the grace period expires, the server + /// process exits. + pub fn initiate_shutdown( + &mut self, + server: &mut Server, + grace_period: Duration, + message: String, + ) { + if self.shutdown_initiated_at.is_none() { + self.shutdown_grace_period = grace_period; + self.shutdown_initiated_at = Some(Instant::now()); + self.shutdown_message = message; + + // Send an initial shutdown warning message to all connected clients + self.send_shutdown_msg(server); + } else { + error!("Shutdown already in progress") + } + } + + /// Aborts an in-progress shutdown and sends a message to all connected + /// clients. + pub fn abort_shutdown(&mut self, server: &mut Server) { + if self.shutdown_initiated_at.is_some() { + self.shutdown_initiated_at = None; + ShutdownCoordinator::send_msg(server, "The shutdown has been aborted".to_owned()); + } else { + error!("There is no shutdown in progress"); + } + } + + /// Called once per tick to process any pending actions related to server + /// shutdown. If the grace period for an initiated shutdown has expired, + /// returns `true` which triggers the loop in `main.rs` to break and + /// exit the server process. + pub fn check(&mut self, server: &mut Server) -> bool { + // Check whether SIGUSR1 has been set + self.check_sigusr1_signal(server); + + // If a shutdown is in progress, check whether it's time to send another warning + // message or shut down if the grace period has expired. + if let Some(shutdown_initiated_at) = self.shutdown_initiated_at { + if Instant::now() > shutdown_initiated_at.add(self.shutdown_grace_period) { + info!("Shutting down"); + return true; + } + + // In the last 10 seconds start sending messages every 1 second + if let Some(time_until_shutdown) = self.time_until_shutdown() { + if time_until_shutdown <= Duration::from_secs(10) { + self.msg_interval = Duration::from_secs(1); + } + } + + // Send another shutdown warning message to all connected clients if + // msg_interval has expired + if self.last_shutdown_msg + self.msg_interval <= Instant::now() { + self.send_shutdown_msg(server); + } + } + + false + } + + /// Checks whether the SIGUSR1 signal has been set, which is used to trigger + /// a graceful shutdown for an update. [Watchtower](https://containrrr.dev/watchtower/) is configured on the main + /// Veloren server to send SIGUSR1 instead of SIGTERM which allows us to + /// react specifically to shutdowns that are for an update. + /// NOTE: SIGUSR1 is not supported on Windows + fn check_sigusr1_signal(&mut self, server: &mut Server) { + if self.sigusr1_signal.load(Ordering::Relaxed) && self.shutdown_initiated_at.is_none() { + info!("Received SIGUSR1 signal, initiating graceful shutdown"); + let grace_period = + Duration::from_secs(server.settings().update_shutdown_grace_period_secs); + let shutdown_message = server.settings().update_shutdown_message.to_owned(); + self.initiate_shutdown(server, grace_period, shutdown_message); + + // Reset the SIGUSR1 signal indicator in case shutdown is aborted and we need to + // trigger shutdown again + self.sigusr1_signal.store(false, Ordering::Relaxed); + } + } + + /// Constructs a formatted shutdown message and sends it to all connected + /// clients + fn send_shutdown_msg(&mut self, server: &mut Server) { + if let Some(time_until_shutdown) = self.time_until_shutdown() { + let msg = format!( + "{} in {}", + self.shutdown_message, + ShutdownCoordinator::duration_to_text(time_until_shutdown) + ); + ShutdownCoordinator::send_msg(server, msg); + self.last_shutdown_msg = Instant::now(); + } + } + + /// Calculates the remaining time before the shutdown grace period expires + fn time_until_shutdown(&self) -> Option { + let shutdown_initiated_at = self.shutdown_initiated_at?; + let shutdown_time = shutdown_initiated_at + self.shutdown_grace_period; + + // If we're somehow trying to calculate the time until shutdown after the + // shutdown time Instant::checked_duration_since will return None as + // negative durations are not supported. + shutdown_time.checked_duration_since(Instant::now()) + } + + /// Logs and sends a message to all connected clients + fn send_msg(server: &mut Server, msg: String) { + info!("{}", &msg); + server.notify_registered_clients(ChatType::CommandError.server_msg(msg)); + } + + /// Converts a `Duration` into text in the format XsXm for example 1 minute + /// 50 seconds would be converted to "1m50s", 2 minutes 0 seconds to + /// "2m" and 0 minutes 23 seconds to "23s". + fn duration_to_text(duration: Duration) -> String { + let secs = duration.as_secs_f32().round() as i32 % 60; + let mins = duration.as_secs_f32().round() as i32 / 60; + + let mut text = String::new(); + if mins > 0 { + text.push_str(format!("{}m", mins).as_str()) + } + if secs > 0 { + text.push_str(format!("{}s", secs).as_str()) + } + text + } +} diff --git a/server-cli/src/tui_runner.rs b/server-cli/src/tui_runner.rs index 8a30367087..e5c3d127e3 100644 --- a/server-cli/src/tui_runner.rs +++ b/server-cli/src/tui_runner.rs @@ -23,6 +23,8 @@ use tui::{ #[derive(Debug, Clone)] pub enum Message { + AbortShutdown, + Shutdown { grace_period: Duration }, Quit, } @@ -35,7 +37,7 @@ pub struct Command<'a> { pub cmd: fn(Vec, &mut mpsc::Sender), } -pub const COMMANDS: [Command; 2] = [ +pub const COMMANDS: [Command; 4] = [ Command { name: "quit", description: "Closes the server", @@ -43,6 +45,31 @@ pub const COMMANDS: [Command; 2] = [ args: 0, cmd: |_, sender| sender.send(Message::Quit).unwrap(), }, + Command { + name: "shutdown", + description: "Initiates a graceful shutdown of the server, waiting the specified number \ + of seconds before shutting down", + split_spaces: true, + args: 1, + cmd: |args, sender| { + if let Ok(grace_period) = args.first().unwrap().parse::() { + sender + .send(Message::Shutdown { + grace_period: Duration::from_secs(grace_period), + }) + .unwrap() + } else { + error!("Grace period must an integer") + } + }, + }, + Command { + name: "abortshutdown", + description: "Aborts a shutdown if one is in progress", + split_spaces: false, + args: 0, + cmd: |_, sender| sender.send(Message::AbortShutdown).unwrap(), + }, Command { name: "help", description: "List all command available", @@ -59,8 +86,9 @@ pub const COMMANDS: [Command; 2] = [ ]; pub struct Tui { - msg_s: Option>, background: Option>, + basic: bool, + msg_s: Option>, running: Arc, } @@ -69,8 +97,9 @@ impl Tui { let (msg_s, msg_r) = mpsc::channel(); ( Self { - msg_s: Some(msg_s), background: None, + basic: false, + msg_s: Some(msg_s), running: Arc::new(AtomicBool::new(true)), }, msg_r, @@ -104,16 +133,12 @@ impl Tui { } pub fn run(&mut self, basic: bool) { - let hook = std::panic::take_hook(); - std::panic::set_hook(Box::new(move |info| { - Self::shutdown(); - hook(info); - })); + self.basic = basic; let mut msg_s = self.msg_s.take().unwrap(); let running = Arc::clone(&self.running); - if basic { + if self.basic { std::thread::spawn(move || { while running.load(Ordering::Relaxed) { let mut line = String::new(); @@ -152,7 +177,7 @@ impl Tui { let mut input = String::new(); if let Err(e) = terminal.clear() { - error!(?e, "clouldn't clean terminal"); + error!(?e, "couldn't clean terminal"); }; while running.load(Ordering::Relaxed) { @@ -206,11 +231,12 @@ impl Tui { } } - fn shutdown() { - let mut stdout = io::stdout(); - - execute!(stdout, LeaveAlternateScreen, DisableMouseCapture).unwrap(); - disable_raw_mode().unwrap(); + pub fn shutdown(basic: bool) { + if !basic { + let mut stdout = io::stdout(); + execute!(stdout, LeaveAlternateScreen, DisableMouseCapture).unwrap(); + disable_raw_mode().unwrap(); + } } } @@ -218,7 +244,7 @@ impl Drop for Tui { fn drop(&mut self) { self.running.store(false, Ordering::Relaxed); self.background.take().map(|m| m.join()); - Self::shutdown(); + Tui::shutdown(self.basic); } } @@ -237,7 +263,7 @@ fn parse_command(input: &str, msg_s: &mut mpsc::Sender) { .collect::>(), ) } else { - (1, vec![args.into_iter().collect::()]) + (0, vec![args.into_iter().collect::()]) }; match arg_len.cmp(&cmd.args) { diff --git a/server/src/lib.rs b/server/src/lib.rs index 6d10d3efcb..3ed6a917dd 100644 --- a/server/src/lib.rs +++ b/server/src/lib.rs @@ -846,6 +846,10 @@ impl Server { } } + pub fn notify_registered_clients(&mut self, msg: ServerMsg) { + self.state.notify_registered_clients(msg); + } + pub fn generate_chunk(&mut self, entity: EcsEntity, key: Vec2) { self.state .ecs() diff --git a/server/src/settings.rs b/server/src/settings.rs index 28ce571c95..6f32d83d51 100644 --- a/server/src/settings.rs +++ b/server/src/settings.rs @@ -31,6 +31,8 @@ pub struct ServerSettings { pub banned_words_files: Vec, pub max_player_group_size: u32, pub client_timeout: Duration, + pub update_shutdown_grace_period_secs: u64, + pub update_shutdown_message: String, } impl Default for ServerSettings { @@ -53,6 +55,8 @@ impl Default for ServerSettings { banned_words_files: Vec::new(), max_player_group_size: 6, client_timeout: Duration::from_secs(40), + update_shutdown_grace_period_secs: 120, + update_shutdown_message: "The server is restarting for an update".to_owned(), } } }