mirror of
https://gitlab.com/veloren/veloren.git
synced 2024-08-30 18:12:32 +00:00
doing a clean shutdown of the BParticipant once the TCP connection is suddenly interrupted
This commit is contained in:
parent
2b13b7d952
commit
5f902b5eab
@ -11,6 +11,7 @@ use async_std::sync::RwLock;
|
|||||||
use futures::{
|
use futures::{
|
||||||
channel::{mpsc, oneshot},
|
channel::{mpsc, oneshot},
|
||||||
future::FutureExt,
|
future::FutureExt,
|
||||||
|
lock::Mutex,
|
||||||
select,
|
select,
|
||||||
sink::SinkExt,
|
sink::SinkExt,
|
||||||
stream::StreamExt,
|
stream::StreamExt,
|
||||||
@ -53,6 +54,12 @@ struct ControlChannels {
|
|||||||
s2b_shutdown_bparticipant_r: oneshot::Receiver<oneshot::Sender<async_std::io::Result<()>>>, /* own */
|
s2b_shutdown_bparticipant_r: oneshot::Receiver<oneshot::Sender<async_std::io::Result<()>>>, /* own */
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//this is needed in case of a shutdown
|
||||||
|
struct BParticipantShutdown {
|
||||||
|
b2b_prios_flushed_r: oneshot::Receiver<()>,
|
||||||
|
mgr_to_shutdown: Vec<oneshot::Sender<()>>,
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct BParticipant {
|
pub struct BParticipant {
|
||||||
remote_pid: Pid,
|
remote_pid: Pid,
|
||||||
@ -63,6 +70,7 @@ pub struct BParticipant {
|
|||||||
running_mgr: AtomicUsize,
|
running_mgr: AtomicUsize,
|
||||||
run_channels: Option<ControlChannels>,
|
run_channels: Option<ControlChannels>,
|
||||||
metrics: Arc<NetworkMetrics>,
|
metrics: Arc<NetworkMetrics>,
|
||||||
|
shutdown_info: Mutex<Option<BParticipantShutdown>>,
|
||||||
no_channel_error_info: RwLock<(Instant, u64)>,
|
no_channel_error_info: RwLock<(Instant, u64)>,
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -106,6 +114,7 @@ impl BParticipant {
|
|||||||
run_channels,
|
run_channels,
|
||||||
metrics,
|
metrics,
|
||||||
no_channel_error_info: RwLock::new((Instant::now(), 0)),
|
no_channel_error_info: RwLock::new((Instant::now(), 0)),
|
||||||
|
shutdown_info: Mutex::new(None),
|
||||||
},
|
},
|
||||||
a2b_steam_open_s,
|
a2b_steam_open_s,
|
||||||
b2a_stream_opened_r,
|
b2a_stream_opened_r,
|
||||||
@ -125,6 +134,14 @@ impl BParticipant {
|
|||||||
let (w2b_frames_s, w2b_frames_r) = mpsc::unbounded::<(Cid, Frame)>();
|
let (w2b_frames_s, w2b_frames_r) = mpsc::unbounded::<(Cid, Frame)>();
|
||||||
let (prios, a2p_msg_s, b2p_notify_empty_stream_s) =
|
let (prios, a2p_msg_s, b2p_notify_empty_stream_s) =
|
||||||
PrioManager::new(self.metrics.clone(), self.remote_pid_string.clone());
|
PrioManager::new(self.metrics.clone(), self.remote_pid_string.clone());
|
||||||
|
*self.shutdown_info.lock().await = Some(BParticipantShutdown {
|
||||||
|
b2b_prios_flushed_r,
|
||||||
|
mgr_to_shutdown: vec![
|
||||||
|
shutdown_send_mgr_sender,
|
||||||
|
shutdown_open_mgr_sender,
|
||||||
|
shutdown_stream_close_mgr_sender,
|
||||||
|
],
|
||||||
|
});
|
||||||
|
|
||||||
let run_channels = self.run_channels.take().unwrap();
|
let run_channels = self.run_channels.take().unwrap();
|
||||||
futures::join!(
|
futures::join!(
|
||||||
@ -152,15 +169,7 @@ impl BParticipant {
|
|||||||
shutdown_stream_close_mgr_receiver,
|
shutdown_stream_close_mgr_receiver,
|
||||||
b2p_notify_empty_stream_s,
|
b2p_notify_empty_stream_s,
|
||||||
),
|
),
|
||||||
self.participant_shutdown_mgr(
|
self.participant_shutdown_mgr(run_channels.s2b_shutdown_bparticipant_r,),
|
||||||
run_channels.s2b_shutdown_bparticipant_r,
|
|
||||||
b2b_prios_flushed_r,
|
|
||||||
vec!(
|
|
||||||
shutdown_send_mgr_sender,
|
|
||||||
shutdown_open_mgr_sender,
|
|
||||||
shutdown_stream_close_mgr_sender
|
|
||||||
)
|
|
||||||
),
|
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -248,6 +257,8 @@ impl BParticipant {
|
|||||||
"FIXME: the frame is actually drop. which is fine for now as the participant \
|
"FIXME: the frame is actually drop. which is fine for now as the participant \
|
||||||
will be closed, but not if we do channel-takeover"
|
will be closed, but not if we do channel-takeover"
|
||||||
);
|
);
|
||||||
|
//TEMP FIX: as we dont have channel takeover yet drop the whole bParticipant
|
||||||
|
self.close_participant(2).await;
|
||||||
false
|
false
|
||||||
} else {
|
} else {
|
||||||
true
|
true
|
||||||
@ -380,10 +391,10 @@ impl BParticipant {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
Frame::Shutdown => error!(
|
Frame::Shutdown => {
|
||||||
"Somehow this Shutdown signal got here, i should probably handle it. To not \
|
debug!("Shutdown received from remote side");
|
||||||
crash let me just put this message here"
|
self.close_participant(2).await;
|
||||||
),
|
},
|
||||||
f => unreachable!("never reaches frame!: {:?}", f),
|
f => unreachable!("never reaches frame!: {:?}", f),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -502,47 +513,11 @@ impl BParticipant {
|
|||||||
async fn participant_shutdown_mgr(
|
async fn participant_shutdown_mgr(
|
||||||
&self,
|
&self,
|
||||||
s2b_shutdown_bparticipant_r: oneshot::Receiver<oneshot::Sender<async_std::io::Result<()>>>,
|
s2b_shutdown_bparticipant_r: oneshot::Receiver<oneshot::Sender<async_std::io::Result<()>>>,
|
||||||
b2b_prios_flushed_r: oneshot::Receiver<()>,
|
|
||||||
mut to_shutdown: Vec<oneshot::Sender<()>>,
|
|
||||||
) {
|
) {
|
||||||
self.running_mgr.fetch_add(1, Ordering::Relaxed);
|
self.running_mgr.fetch_add(1, Ordering::Relaxed);
|
||||||
trace!("start participant_shutdown_mgr");
|
trace!("start participant_shutdown_mgr");
|
||||||
let sender = s2b_shutdown_bparticipant_r.await.unwrap();
|
let sender = s2b_shutdown_bparticipant_r.await.unwrap();
|
||||||
debug!("closing all managers");
|
self.close_participant(1).await;
|
||||||
for sender in to_shutdown.drain(..) {
|
|
||||||
if let Err(e) = sender.send(()) {
|
|
||||||
warn!(?e, "manager seems to be closed already, weird, maybe a bug");
|
|
||||||
};
|
|
||||||
}
|
|
||||||
debug!("closing all streams");
|
|
||||||
for (sid, si) in self.streams.write().await.drain() {
|
|
||||||
trace!(?sid, "shutting down Stream");
|
|
||||||
si.closed.store(true, Ordering::Relaxed);
|
|
||||||
}
|
|
||||||
debug!("waiting for prios to be flushed");
|
|
||||||
b2b_prios_flushed_r.await.unwrap();
|
|
||||||
debug!("closing all channels");
|
|
||||||
for ci in self.channels.write().await.drain(..) {
|
|
||||||
if let Err(e) = ci.b2r_read_shutdown.send(()) {
|
|
||||||
debug!(?e, ?ci.cid, "seems like this read protocol got already dropped by closing the Stream itself, just ignoring the fact");
|
|
||||||
};
|
|
||||||
}
|
|
||||||
//Wait for other bparticipants mgr to close via AtomicUsize
|
|
||||||
const SLEEP_TIME: Duration = Duration::from_millis(5);
|
|
||||||
async_std::task::sleep(SLEEP_TIME).await;
|
|
||||||
let mut i: u32 = 1;
|
|
||||||
while self.running_mgr.load(Ordering::Relaxed) > 1 {
|
|
||||||
i += 1;
|
|
||||||
if i.rem_euclid(10) == 1 {
|
|
||||||
trace!(
|
|
||||||
"waiting for bparticipant mgr to shut down, remaining {}",
|
|
||||||
self.running_mgr.load(Ordering::Relaxed) - 1
|
|
||||||
);
|
|
||||||
}
|
|
||||||
async_std::task::sleep(SLEEP_TIME * i).await;
|
|
||||||
}
|
|
||||||
trace!("all bparticipant mgr (except me) are shut down now");
|
|
||||||
self.metrics.participants_disconnected_total.inc();
|
|
||||||
sender.send(Ok(())).unwrap();
|
sender.send(Ok(())).unwrap();
|
||||||
trace!("stop participant_shutdown_mgr");
|
trace!("stop participant_shutdown_mgr");
|
||||||
self.running_mgr.fetch_sub(1, Ordering::Relaxed);
|
self.running_mgr.fetch_sub(1, Ordering::Relaxed);
|
||||||
@ -638,9 +613,57 @@ impl BParticipant {
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/// this will gracefully shut down the bparticipant
|
||||||
async fn close_participant(&self) {
|
/// allowed_managers: the number of open managers to sleep on. Must be 1 for
|
||||||
|
/// shutdown_mgr and 2 if it comes from a send error.
|
||||||
|
async fn close_participant(&self, allowed_managers: usize) {
|
||||||
|
trace!("participant shutdown triggered");
|
||||||
|
let mut info = match self.shutdown_info.lock().await.take() {
|
||||||
|
Some(info) => info,
|
||||||
|
None => {
|
||||||
|
error!(
|
||||||
|
"Close of participant seemed to be called twice, that's bad, ignoring the 2nd \
|
||||||
|
close"
|
||||||
|
);
|
||||||
|
return;
|
||||||
|
},
|
||||||
|
};
|
||||||
|
debug!("closing all managers");
|
||||||
|
for sender in info.mgr_to_shutdown.drain(..) {
|
||||||
|
if let Err(e) = sender.send(()) {
|
||||||
|
warn!(?e, "manager seems to be closed already, weird, maybe a bug");
|
||||||
|
};
|
||||||
|
}
|
||||||
|
debug!("closing all streams");
|
||||||
|
for (sid, si) in self.streams.write().await.drain() {
|
||||||
|
trace!(?sid, "shutting down Stream");
|
||||||
|
si.closed.store(true, Ordering::Relaxed);
|
||||||
|
}
|
||||||
|
debug!("waiting for prios to be flushed");
|
||||||
|
info.b2b_prios_flushed_r.await.unwrap();
|
||||||
|
debug!("closing all channels");
|
||||||
|
for ci in self.channels.write().await.drain(..) {
|
||||||
|
if let Err(e) = ci.b2r_read_shutdown.send(()) {
|
||||||
|
debug!(?e, ?ci.cid, "seems like this read protocol got already dropped by closing the Stream itself, just ignoring the fact");
|
||||||
|
};
|
||||||
|
}
|
||||||
|
//Wait for other bparticipants mgr to close via AtomicUsize
|
||||||
|
const SLEEP_TIME: Duration = Duration::from_millis(5);
|
||||||
|
async_std::task::sleep(SLEEP_TIME).await;
|
||||||
|
let mut i: u32 = 1;
|
||||||
|
while self.running_mgr.load(Ordering::Relaxed) > allowed_managers {
|
||||||
|
i += 1;
|
||||||
|
if i.rem_euclid(10) == 1 {
|
||||||
|
trace!(
|
||||||
|
?allowed_managers,
|
||||||
|
"waiting for bparticipant mgr to shut down, remaining {}",
|
||||||
|
self.running_mgr.load(Ordering::Relaxed) - allowed_managers
|
||||||
|
);
|
||||||
|
}
|
||||||
|
async_std::task::sleep(SLEEP_TIME * i).await;
|
||||||
|
}
|
||||||
|
trace!("all bparticipant mgr (except me) are shut down now");
|
||||||
|
self.metrics.participants_disconnected_total.inc();
|
||||||
|
debug!("bparticipant close done");
|
||||||
}
|
}
|
||||||
*/
|
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user