Implemented graceful shutdown on SIGUSR1 signal. Added shutdown <seconds> TUI command. Added abortshutdown TUI command. Fixed a bug in TUI that caused a panic on quit in basic mode on windows.

This commit is contained in:
Ben Wallis 2020-10-03 20:10:34 +01:00
parent 8402f98261
commit 51459c0733
9 changed files with 275 additions and 22 deletions

1
Cargo.lock generated
View File

@ -4712,6 +4712,7 @@ dependencies = [
"clap", "clap",
"crossterm", "crossterm",
"lazy_static", "lazy_static",
"signal-hook",
"tracing", "tracing",
"tracing-subscriber", "tracing-subscriber",
"tracing-tracy", "tracing-tracy",

View File

@ -17,6 +17,7 @@ ansi-parser = "0.6"
clap = "2.33" clap = "2.33"
crossterm = "0.17" crossterm = "0.17"
lazy_static = "1" lazy_static = "1"
signal-hook = "0.1.16"
tracing = { version = "0.1", default-features = false } tracing = { version = "0.1", default-features = false }
tracing-subscriber = { version = "0.2.3", default-features = false, features = ["env-filter", "fmt", "chrono", "ansi", "smallvec"] } tracing-subscriber = { version = "0.2.3", default-features = false, features = ["env-filter", "fmt", "chrono", "ansi", "smallvec"] }

View File

@ -1,5 +1,8 @@
FROM debian:stable-slim FROM debian:stable-slim
# SIGUSR1 causes veloren-server-cli to initiate a graceful shutdown
LABEL com.centurylinklabs.watchtower.stop-signal="SIGUSR1"
ARG PROJECTNAME=server-cli ARG PROJECTNAME=server-cli
# librust-backtrace+libbacktrace-dev = backtrace functionality # librust-backtrace+libbacktrace-dev = backtrace functionality

View File

@ -16,4 +16,4 @@ services:
image: containrrr/watchtower image: containrrr/watchtower
volumes: volumes:
- /var/run/docker.sock:/var/run/docker.sock - /var/run/docker.sock:/var/run/docker.sock
command: --interval 30 --cleanup veloren-game-server-master command: --interval 30 --stop-timeout 130s --cleanup veloren-game-server-master

View File

@ -1,24 +1,32 @@
#![deny(unsafe_code)] #![deny(unsafe_code)]
#![deny(clippy::clone_on_ref_ptr)] #![deny(clippy::clone_on_ref_ptr)]
mod shutdown_coordinator;
mod tui_runner; mod tui_runner;
mod tuilog; mod tuilog;
#[macro_use] extern crate lazy_static; #[macro_use] extern crate lazy_static;
use crate::{ use crate::{
shutdown_coordinator::ShutdownCoordinator,
tui_runner::{Message, Tui}, tui_runner::{Message, Tui},
tuilog::TuiLog, tuilog::TuiLog,
}; };
use common::clock::Clock; use common::clock::Clock;
use server::{Event, Input, Server, ServerSettings}; use server::{Event, Input, Server, ServerSettings};
#[cfg(any(target_os = "linux", target_os = "macos"))]
use signal_hook::SIGUSR1;
use tracing::{info, Level}; use tracing::{info, Level};
use tracing_subscriber::{filter::LevelFilter, EnvFilter, FmtSubscriber}; use tracing_subscriber::{filter::LevelFilter, EnvFilter, FmtSubscriber};
#[cfg(feature = "tracy")] #[cfg(feature = "tracy")]
use tracing_subscriber::{layer::SubscriberExt, prelude::*}; use tracing_subscriber::{layer::SubscriberExt, prelude::*};
use clap::{App, Arg}; use clap::{App, Arg};
use std::{io, sync::mpsc, time::Duration}; use std::{
io,
sync::{atomic::AtomicBool, mpsc, Arc},
time::Duration,
};
const TPS: u64 = 30; const TPS: u64 = 30;
const RUST_LOG_ENV: &str = "RUST_LOG"; const RUST_LOG_ENV: &str = "RUST_LOG";
@ -42,6 +50,12 @@ fn main() -> io::Result<()> {
.get_matches(); .get_matches();
let basic = matches.is_present("basic"); let basic = matches.is_present("basic");
let sigusr1_signal = Arc::new(AtomicBool::new(false));
#[cfg(any(target_os = "linux", target_os = "macos"))]
let _ = signal_hook::flag::register(SIGUSR1, Arc::clone(&sigusr1_signal));
let (mut tui, msg_r) = Tui::new(); let (mut tui, msg_r) = Tui::new();
// Init logging // Init logging
@ -89,6 +103,14 @@ fn main() -> io::Result<()> {
} }
} }
// Panic hook to ensure that console mode is set back correctly if in non-basic
// mode
let hook = std::panic::take_hook();
std::panic::set_hook(Box::new(move |info| {
Tui::shutdown(basic);
hook(info);
}));
tui.run(basic); tui.run(basic);
info!("Starting server..."); info!("Starting server...");
@ -103,11 +125,20 @@ fn main() -> io::Result<()> {
// Create server // Create server
let mut server = Server::new(settings).expect("Failed to create server instance!"); let mut server = Server::new(settings).expect("Failed to create server instance!");
info!("Server is ready to accept connections."); info!(
info!(?metrics_port, "starting metrics at port"); ?server_port,
info!(?server_port, "starting server at port"); ?metrics_port,
"Server is ready to accept connections."
);
let mut shutdown_coordinator = ShutdownCoordinator::new(Arc::clone(&sigusr1_signal));
loop { loop {
// Terminate the server if instructed to do so by the shutdown coordinator
if shutdown_coordinator.check(&mut server) {
break;
}
let events = server let events = server
.tick(Input::default(), clock.get_last_delta()) .tick(Input::default(), clock.get_last_delta())
.expect("Failed to tick server"); .expect("Failed to tick server");
@ -127,6 +158,13 @@ fn main() -> io::Result<()> {
match msg_r.try_recv() { match msg_r.try_recv() {
Ok(msg) => match msg { Ok(msg) => match msg {
Message::AbortShutdown => shutdown_coordinator.abort_shutdown(&mut server),
Message::Shutdown { grace_period } => {
// TODO: The TUI parser doesn't support quoted strings so it is not currently
// possible to provide a shutdown reason from the console.
let message = "The server is shutting down".to_owned();
shutdown_coordinator.initiate_shutdown(&mut server, grace_period, message);
},
Message::Quit => { Message::Quit => {
info!("Closing the server"); info!("Closing the server");
break; break;

View File

@ -0,0 +1,176 @@
use common::comp::chat::ChatType;
use server::Server;
use std::{
ops::Add,
sync::{
atomic::{AtomicBool, Ordering},
Arc,
},
time::{Duration, Instant},
};
use tracing::{error, info};
/// Coordinates the shutdown procedure for the server, which can be initiated by
/// either the TUI console interface or by sending the server the SIGUSR1 signal
/// which indicates the server is restarting due to an update.
pub(crate) struct ShutdownCoordinator {
/// The instant that the last shutdown message was sent, used for
/// calculating when to send the next shutdown message
last_shutdown_msg: Instant,
/// The interval that shutdown warning messages are sent at
msg_interval: Duration,
/// The instant that shudown was initiated at
shutdown_initiated_at: Option<Instant>,
/// The period to wait before shutting down after shutdown is initiated
shutdown_grace_period: Duration,
/// The message to use for the shutdown warning message that is sent to all
/// connected players
shutdown_message: String,
/// Provided by `signal_hook` to allow observation of the SIGUSR1 signal
sigusr1_signal: Arc<AtomicBool>,
}
impl ShutdownCoordinator {
pub fn new(sigusr1_signal: Arc<AtomicBool>) -> Self {
Self {
last_shutdown_msg: Instant::now(),
msg_interval: Duration::from_secs(30),
shutdown_initiated_at: None,
shutdown_grace_period: Duration::from_secs(0),
shutdown_message: String::new(),
sigusr1_signal,
}
}
/// Initiates a graceful shutdown of the server using the specified grace
/// period and message. When the grace period expires, the server
/// process exits.
pub fn initiate_shutdown(
&mut self,
server: &mut Server,
grace_period: Duration,
message: String,
) {
if self.shutdown_initiated_at.is_none() {
self.shutdown_grace_period = grace_period;
self.shutdown_initiated_at = Some(Instant::now());
self.shutdown_message = message;
// Send an initial shutdown warning message to all connected clients
self.send_shutdown_msg(server);
} else {
error!("Shutdown already in progress")
}
}
/// Aborts an in-progress shutdown and sends a message to all connected
/// clients.
pub fn abort_shutdown(&mut self, server: &mut Server) {
if self.shutdown_initiated_at.is_some() {
self.shutdown_initiated_at = None;
ShutdownCoordinator::send_msg(server, "The shutdown has been aborted".to_owned());
} else {
error!("There is no shutdown in progress");
}
}
/// Called once per tick to process any pending actions related to server
/// shutdown. If the grace period for an initiated shutdown has expired,
/// returns `true` which triggers the loop in `main.rs` to break and
/// exit the server process.
pub fn check(&mut self, server: &mut Server) -> bool {
// Check whether SIGUSR1 has been set
self.check_sigusr1_signal(server);
// If a shutdown is in progress, check whether it's time to send another warning
// message or shut down if the grace period has expired.
if let Some(shutdown_initiated_at) = self.shutdown_initiated_at {
if Instant::now() > shutdown_initiated_at.add(self.shutdown_grace_period) {
info!("Shutting down");
return true;
}
// In the last 10 seconds start sending messages every 1 second
if let Some(time_until_shutdown) = self.time_until_shutdown() {
if time_until_shutdown <= Duration::from_secs(10) {
self.msg_interval = Duration::from_secs(1);
}
}
// Send another shutdown warning message to all connected clients if
// msg_interval has expired
if self.last_shutdown_msg + self.msg_interval <= Instant::now() {
self.send_shutdown_msg(server);
}
}
false
}
/// Checks whether the SIGUSR1 signal has been set, which is used to trigger
/// a graceful shutdown for an update. [Watchtower](https://containrrr.dev/watchtower/) is configured on the main
/// Veloren server to send SIGUSR1 instead of SIGTERM which allows us to
/// react specifically to shutdowns that are for an update.
/// NOTE: SIGUSR1 is not supported on Windows
fn check_sigusr1_signal(&mut self, server: &mut Server) {
if self.sigusr1_signal.load(Ordering::Relaxed) && self.shutdown_initiated_at.is_none() {
info!("Received SIGUSR1 signal, initiating graceful shutdown");
let grace_period =
Duration::from_secs(server.settings().update_shutdown_grace_period_secs);
let shutdown_message = server.settings().update_shutdown_message.to_owned();
self.initiate_shutdown(server, grace_period, shutdown_message);
// Reset the SIGUSR1 signal indicator in case shutdown is aborted and we need to
// trigger shutdown again
self.sigusr1_signal.store(false, Ordering::Relaxed);
}
}
/// Constructs a formatted shutdown message and sends it to all connected
/// clients
fn send_shutdown_msg(&mut self, server: &mut Server) {
if let Some(time_until_shutdown) = self.time_until_shutdown() {
let msg = format!(
"{} in {}",
self.shutdown_message,
ShutdownCoordinator::duration_to_text(time_until_shutdown)
);
ShutdownCoordinator::send_msg(server, msg);
self.last_shutdown_msg = Instant::now();
}
}
/// Calculates the remaining time before the shutdown grace period expires
fn time_until_shutdown(&self) -> Option<Duration> {
let shutdown_initiated_at = self.shutdown_initiated_at?;
let shutdown_time = shutdown_initiated_at + self.shutdown_grace_period;
// If we're somehow trying to calculate the time until shutdown after the
// shutdown time Instant::checked_duration_since will return None as
// negative durations are not supported.
shutdown_time.checked_duration_since(Instant::now())
}
/// Logs and sends a message to all connected clients
fn send_msg(server: &mut Server, msg: String) {
info!("{}", &msg);
server.notify_registered_clients(ChatType::CommandError.server_msg(msg));
}
/// Converts a `Duration` into text in the format XsXm for example 1 minute
/// 50 seconds would be converted to "1m50s", 2 minutes 0 seconds to
/// "2m" and 0 minutes 23 seconds to "23s".
fn duration_to_text(duration: Duration) -> String {
let secs = duration.as_secs_f32().round() as i32 % 60;
let mins = duration.as_secs_f32().round() as i32 / 60;
let mut text = String::new();
if mins > 0 {
text.push_str(format!("{}m", mins).as_str())
}
if secs > 0 {
text.push_str(format!("{}s", secs).as_str())
}
text
}
}

View File

@ -23,6 +23,8 @@ use tui::{
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub enum Message { pub enum Message {
AbortShutdown,
Shutdown { grace_period: Duration },
Quit, Quit,
} }
@ -35,7 +37,7 @@ pub struct Command<'a> {
pub cmd: fn(Vec<String>, &mut mpsc::Sender<Message>), pub cmd: fn(Vec<String>, &mut mpsc::Sender<Message>),
} }
pub const COMMANDS: [Command; 2] = [ pub const COMMANDS: [Command; 4] = [
Command { Command {
name: "quit", name: "quit",
description: "Closes the server", description: "Closes the server",
@ -43,6 +45,31 @@ pub const COMMANDS: [Command; 2] = [
args: 0, args: 0,
cmd: |_, sender| sender.send(Message::Quit).unwrap(), cmd: |_, sender| sender.send(Message::Quit).unwrap(),
}, },
Command {
name: "shutdown",
description: "Initiates a graceful shutdown of the server, waiting the specified number \
of seconds before shutting down",
split_spaces: true,
args: 1,
cmd: |args, sender| {
if let Ok(grace_period) = args.first().unwrap().parse::<u64>() {
sender
.send(Message::Shutdown {
grace_period: Duration::from_secs(grace_period),
})
.unwrap()
} else {
error!("Grace period must an integer")
}
},
},
Command {
name: "abortshutdown",
description: "Aborts a shutdown if one is in progress",
split_spaces: false,
args: 0,
cmd: |_, sender| sender.send(Message::AbortShutdown).unwrap(),
},
Command { Command {
name: "help", name: "help",
description: "List all command available", description: "List all command available",
@ -59,8 +86,9 @@ pub const COMMANDS: [Command; 2] = [
]; ];
pub struct Tui { pub struct Tui {
msg_s: Option<mpsc::Sender<Message>>,
background: Option<std::thread::JoinHandle<()>>, background: Option<std::thread::JoinHandle<()>>,
basic: bool,
msg_s: Option<mpsc::Sender<Message>>,
running: Arc<AtomicBool>, running: Arc<AtomicBool>,
} }
@ -69,8 +97,9 @@ impl Tui {
let (msg_s, msg_r) = mpsc::channel(); let (msg_s, msg_r) = mpsc::channel();
( (
Self { Self {
msg_s: Some(msg_s),
background: None, background: None,
basic: false,
msg_s: Some(msg_s),
running: Arc::new(AtomicBool::new(true)), running: Arc::new(AtomicBool::new(true)),
}, },
msg_r, msg_r,
@ -104,16 +133,12 @@ impl Tui {
} }
pub fn run(&mut self, basic: bool) { pub fn run(&mut self, basic: bool) {
let hook = std::panic::take_hook(); self.basic = basic;
std::panic::set_hook(Box::new(move |info| {
Self::shutdown();
hook(info);
}));
let mut msg_s = self.msg_s.take().unwrap(); let mut msg_s = self.msg_s.take().unwrap();
let running = Arc::clone(&self.running); let running = Arc::clone(&self.running);
if basic { if self.basic {
std::thread::spawn(move || { std::thread::spawn(move || {
while running.load(Ordering::Relaxed) { while running.load(Ordering::Relaxed) {
let mut line = String::new(); let mut line = String::new();
@ -152,7 +177,7 @@ impl Tui {
let mut input = String::new(); let mut input = String::new();
if let Err(e) = terminal.clear() { if let Err(e) = terminal.clear() {
error!(?e, "clouldn't clean terminal"); error!(?e, "couldn't clean terminal");
}; };
while running.load(Ordering::Relaxed) { while running.load(Ordering::Relaxed) {
@ -206,11 +231,12 @@ impl Tui {
} }
} }
fn shutdown() { pub fn shutdown(basic: bool) {
let mut stdout = io::stdout(); if !basic {
let mut stdout = io::stdout();
execute!(stdout, LeaveAlternateScreen, DisableMouseCapture).unwrap(); execute!(stdout, LeaveAlternateScreen, DisableMouseCapture).unwrap();
disable_raw_mode().unwrap(); disable_raw_mode().unwrap();
}
} }
} }
@ -218,7 +244,7 @@ impl Drop for Tui {
fn drop(&mut self) { fn drop(&mut self) {
self.running.store(false, Ordering::Relaxed); self.running.store(false, Ordering::Relaxed);
self.background.take().map(|m| m.join()); self.background.take().map(|m| m.join());
Self::shutdown(); Tui::shutdown(self.basic);
} }
} }
@ -237,7 +263,7 @@ fn parse_command(input: &str, msg_s: &mut mpsc::Sender<Message>) {
.collect::<Vec<String>>(), .collect::<Vec<String>>(),
) )
} else { } else {
(1, vec![args.into_iter().collect::<String>()]) (0, vec![args.into_iter().collect::<String>()])
}; };
match arg_len.cmp(&cmd.args) { match arg_len.cmp(&cmd.args) {

View File

@ -846,6 +846,10 @@ impl Server {
} }
} }
pub fn notify_registered_clients(&mut self, msg: ServerMsg) {
self.state.notify_registered_clients(msg);
}
pub fn generate_chunk(&mut self, entity: EcsEntity, key: Vec2<i32>) { pub fn generate_chunk(&mut self, entity: EcsEntity, key: Vec2<i32>) {
self.state self.state
.ecs() .ecs()

View File

@ -31,6 +31,8 @@ pub struct ServerSettings {
pub banned_words_files: Vec<PathBuf>, pub banned_words_files: Vec<PathBuf>,
pub max_player_group_size: u32, pub max_player_group_size: u32,
pub client_timeout: Duration, pub client_timeout: Duration,
pub update_shutdown_grace_period_secs: u64,
pub update_shutdown_message: String,
} }
impl Default for ServerSettings { impl Default for ServerSettings {
@ -53,6 +55,8 @@ impl Default for ServerSettings {
banned_words_files: Vec::new(), banned_words_files: Vec::new(),
max_player_group_size: 6, max_player_group_size: 6,
client_timeout: Duration::from_secs(40), client_timeout: Duration::from_secs(40),
update_shutdown_grace_period_secs: 120,
update_shutdown_message: "The server is restarting for an update".to_owned(),
} }
} }
} }