doing a clean shutdown of the BParticipant once the TCP connection is suddenly interrupted

This commit is contained in:
Marcel Märtens 2020-07-05 22:51:07 +02:00
parent 2b13b7d952
commit 5f902b5eab

View File

@ -11,6 +11,7 @@ use async_std::sync::RwLock;
use futures::{
channel::{mpsc, oneshot},
future::FutureExt,
lock::Mutex,
select,
sink::SinkExt,
stream::StreamExt,
@ -53,6 +54,12 @@ struct ControlChannels {
s2b_shutdown_bparticipant_r: oneshot::Receiver<oneshot::Sender<async_std::io::Result<()>>>, /* own */
}
//this is needed in case of a shutdown
struct BParticipantShutdown {
b2b_prios_flushed_r: oneshot::Receiver<()>,
mgr_to_shutdown: Vec<oneshot::Sender<()>>,
}
#[derive(Debug)]
pub struct BParticipant {
remote_pid: Pid,
@ -63,6 +70,7 @@ pub struct BParticipant {
running_mgr: AtomicUsize,
run_channels: Option<ControlChannels>,
metrics: Arc<NetworkMetrics>,
shutdown_info: Mutex<Option<BParticipantShutdown>>,
no_channel_error_info: RwLock<(Instant, u64)>,
}
@ -106,6 +114,7 @@ impl BParticipant {
run_channels,
metrics,
no_channel_error_info: RwLock::new((Instant::now(), 0)),
shutdown_info: Mutex::new(None),
},
a2b_steam_open_s,
b2a_stream_opened_r,
@ -125,6 +134,14 @@ impl BParticipant {
let (w2b_frames_s, w2b_frames_r) = mpsc::unbounded::<(Cid, Frame)>();
let (prios, a2p_msg_s, b2p_notify_empty_stream_s) =
PrioManager::new(self.metrics.clone(), self.remote_pid_string.clone());
*self.shutdown_info.lock().await = Some(BParticipantShutdown {
b2b_prios_flushed_r,
mgr_to_shutdown: vec![
shutdown_send_mgr_sender,
shutdown_open_mgr_sender,
shutdown_stream_close_mgr_sender,
],
});
let run_channels = self.run_channels.take().unwrap();
futures::join!(
@ -152,15 +169,7 @@ impl BParticipant {
shutdown_stream_close_mgr_receiver,
b2p_notify_empty_stream_s,
),
self.participant_shutdown_mgr(
run_channels.s2b_shutdown_bparticipant_r,
b2b_prios_flushed_r,
vec!(
shutdown_send_mgr_sender,
shutdown_open_mgr_sender,
shutdown_stream_close_mgr_sender
)
),
self.participant_shutdown_mgr(run_channels.s2b_shutdown_bparticipant_r,),
);
}
@ -248,6 +257,8 @@ impl BParticipant {
"FIXME: the frame is actually drop. which is fine for now as the participant \
will be closed, but not if we do channel-takeover"
);
//TEMP FIX: as we dont have channel takeover yet drop the whole bParticipant
self.close_participant(2).await;
false
} else {
true
@ -380,10 +391,10 @@ impl BParticipant {
}
}
},
Frame::Shutdown => error!(
"Somehow this Shutdown signal got here, i should probably handle it. To not \
crash let me just put this message here"
),
Frame::Shutdown => {
debug!("Shutdown received from remote side");
self.close_participant(2).await;
},
f => unreachable!("never reaches frame!: {:?}", f),
}
}
@ -502,47 +513,11 @@ impl BParticipant {
async fn participant_shutdown_mgr(
&self,
s2b_shutdown_bparticipant_r: oneshot::Receiver<oneshot::Sender<async_std::io::Result<()>>>,
b2b_prios_flushed_r: oneshot::Receiver<()>,
mut to_shutdown: Vec<oneshot::Sender<()>>,
) {
self.running_mgr.fetch_add(1, Ordering::Relaxed);
trace!("start participant_shutdown_mgr");
let sender = s2b_shutdown_bparticipant_r.await.unwrap();
debug!("closing all managers");
for sender in to_shutdown.drain(..) {
if let Err(e) = sender.send(()) {
warn!(?e, "manager seems to be closed already, weird, maybe a bug");
};
}
debug!("closing all streams");
for (sid, si) in self.streams.write().await.drain() {
trace!(?sid, "shutting down Stream");
si.closed.store(true, Ordering::Relaxed);
}
debug!("waiting for prios to be flushed");
b2b_prios_flushed_r.await.unwrap();
debug!("closing all channels");
for ci in self.channels.write().await.drain(..) {
if let Err(e) = ci.b2r_read_shutdown.send(()) {
debug!(?e, ?ci.cid, "seems like this read protocol got already dropped by closing the Stream itself, just ignoring the fact");
};
}
//Wait for other bparticipants mgr to close via AtomicUsize
const SLEEP_TIME: Duration = Duration::from_millis(5);
async_std::task::sleep(SLEEP_TIME).await;
let mut i: u32 = 1;
while self.running_mgr.load(Ordering::Relaxed) > 1 {
i += 1;
if i.rem_euclid(10) == 1 {
trace!(
"waiting for bparticipant mgr to shut down, remaining {}",
self.running_mgr.load(Ordering::Relaxed) - 1
);
}
async_std::task::sleep(SLEEP_TIME * i).await;
}
trace!("all bparticipant mgr (except me) are shut down now");
self.metrics.participants_disconnected_total.inc();
self.close_participant(1).await;
sender.send(Ok(())).unwrap();
trace!("stop participant_shutdown_mgr");
self.running_mgr.fetch_sub(1, Ordering::Relaxed);
@ -638,9 +613,57 @@ impl BParticipant {
)
}
/*
async fn close_participant(&self) {
/// this will gracefully shut down the bparticipant
/// allowed_managers: the number of open managers to sleep on. Must be 1 for
/// shutdown_mgr and 2 if it comes from a send error.
async fn close_participant(&self, allowed_managers: usize) {
trace!("participant shutdown triggered");
let mut info = match self.shutdown_info.lock().await.take() {
Some(info) => info,
None => {
error!(
"Close of participant seemed to be called twice, that's bad, ignoring the 2nd \
close"
);
return;
},
};
debug!("closing all managers");
for sender in info.mgr_to_shutdown.drain(..) {
if let Err(e) = sender.send(()) {
warn!(?e, "manager seems to be closed already, weird, maybe a bug");
};
}
debug!("closing all streams");
for (sid, si) in self.streams.write().await.drain() {
trace!(?sid, "shutting down Stream");
si.closed.store(true, Ordering::Relaxed);
}
debug!("waiting for prios to be flushed");
info.b2b_prios_flushed_r.await.unwrap();
debug!("closing all channels");
for ci in self.channels.write().await.drain(..) {
if let Err(e) = ci.b2r_read_shutdown.send(()) {
debug!(?e, ?ci.cid, "seems like this read protocol got already dropped by closing the Stream itself, just ignoring the fact");
};
}
//Wait for other bparticipants mgr to close via AtomicUsize
const SLEEP_TIME: Duration = Duration::from_millis(5);
async_std::task::sleep(SLEEP_TIME).await;
let mut i: u32 = 1;
while self.running_mgr.load(Ordering::Relaxed) > allowed_managers {
i += 1;
if i.rem_euclid(10) == 1 {
trace!(
?allowed_managers,
"waiting for bparticipant mgr to shut down, remaining {}",
self.running_mgr.load(Ordering::Relaxed) - allowed_managers
);
}
async_std::task::sleep(SLEEP_TIME * i).await;
}
trace!("all bparticipant mgr (except me) are shut down now");
self.metrics.participants_disconnected_total.inc();
debug!("bparticipant close done");
}
*/
}