mirror of
https://gitlab.com/veloren/veloren.git
synced 2024-08-30 18:12:32 +00:00
using Locks a more sensitive way.
- replace RwLock by Mutex if it's only accessed for insert/delete - use RwLock<HashMap<Mutex>> pattern otherwise in order to allow concurrent `.read()` - fixed a deadlock O.o
This commit is contained in:
parent
877eaee151
commit
d37ca02913
@ -8,11 +8,7 @@ use crate::{
|
||||
scheduler::Scheduler,
|
||||
types::{Mid, Pid, Prio, Promises, Sid},
|
||||
};
|
||||
use async_std::{
|
||||
io,
|
||||
sync::{Mutex, RwLock},
|
||||
task,
|
||||
};
|
||||
use async_std::{io, sync::Mutex, task};
|
||||
use futures::{
|
||||
channel::{mpsc, oneshot},
|
||||
sink::SinkExt,
|
||||
@ -52,8 +48,8 @@ pub enum ProtocolAddr {
|
||||
pub struct Participant {
|
||||
local_pid: Pid,
|
||||
remote_pid: Pid,
|
||||
a2b_stream_open_s: RwLock<mpsc::UnboundedSender<A2bStreamOpen>>,
|
||||
b2a_stream_opened_r: RwLock<mpsc::UnboundedReceiver<Stream>>,
|
||||
a2b_stream_open_s: Mutex<mpsc::UnboundedSender<A2bStreamOpen>>,
|
||||
b2a_stream_opened_r: Mutex<mpsc::UnboundedReceiver<Stream>>,
|
||||
a2s_disconnect_s: A2sDisconnect,
|
||||
}
|
||||
|
||||
@ -147,12 +143,12 @@ pub enum StreamError {
|
||||
/// [`connected`]: Network::connected
|
||||
pub struct Network {
|
||||
local_pid: Pid,
|
||||
participant_disconnect_sender: RwLock<HashMap<Pid, A2sDisconnect>>,
|
||||
participant_disconnect_sender: Mutex<HashMap<Pid, A2sDisconnect>>,
|
||||
listen_sender:
|
||||
RwLock<mpsc::UnboundedSender<(ProtocolAddr, oneshot::Sender<async_std::io::Result<()>>)>>,
|
||||
Mutex<mpsc::UnboundedSender<(ProtocolAddr, oneshot::Sender<async_std::io::Result<()>>)>>,
|
||||
connect_sender:
|
||||
RwLock<mpsc::UnboundedSender<(ProtocolAddr, oneshot::Sender<io::Result<Participant>>)>>,
|
||||
connected_receiver: RwLock<mpsc::UnboundedReceiver<Participant>>,
|
||||
Mutex<mpsc::UnboundedSender<(ProtocolAddr, oneshot::Sender<io::Result<Participant>>)>>,
|
||||
connected_receiver: Mutex<mpsc::UnboundedReceiver<Participant>>,
|
||||
shutdown_sender: Option<oneshot::Sender<()>>,
|
||||
}
|
||||
|
||||
@ -249,10 +245,10 @@ impl Network {
|
||||
(
|
||||
Self {
|
||||
local_pid: participant_id,
|
||||
participant_disconnect_sender: RwLock::new(HashMap::new()),
|
||||
listen_sender: RwLock::new(listen_sender),
|
||||
connect_sender: RwLock::new(connect_sender),
|
||||
connected_receiver: RwLock::new(connected_receiver),
|
||||
participant_disconnect_sender: Mutex::new(HashMap::new()),
|
||||
listen_sender: Mutex::new(listen_sender),
|
||||
connect_sender: Mutex::new(connect_sender),
|
||||
connected_receiver: Mutex::new(connected_receiver),
|
||||
shutdown_sender: Some(shutdown_sender),
|
||||
},
|
||||
move || {
|
||||
@ -300,7 +296,7 @@ impl Network {
|
||||
let (s2a_result_s, s2a_result_r) = oneshot::channel::<async_std::io::Result<()>>();
|
||||
debug!(?address, "listening on address");
|
||||
self.listen_sender
|
||||
.write()
|
||||
.lock()
|
||||
.await
|
||||
.send((address, s2a_result_s))
|
||||
.await?;
|
||||
@ -356,7 +352,7 @@ impl Network {
|
||||
let (pid_sender, pid_receiver) = oneshot::channel::<io::Result<Participant>>();
|
||||
debug!(?address, "Connect to address");
|
||||
self.connect_sender
|
||||
.write()
|
||||
.lock()
|
||||
.await
|
||||
.send((address, pid_sender))
|
||||
.await?;
|
||||
@ -370,7 +366,7 @@ impl Network {
|
||||
"Received Participant id from remote and return to user"
|
||||
);
|
||||
self.participant_disconnect_sender
|
||||
.write()
|
||||
.lock()
|
||||
.await
|
||||
.insert(pid, participant.a2s_disconnect_s.clone());
|
||||
Ok(participant)
|
||||
@ -410,9 +406,9 @@ impl Network {
|
||||
/// [`Streams`]: crate::api::Stream
|
||||
/// [`listen`]: crate::api::Network::listen
|
||||
pub async fn connected(&self) -> Result<Participant, NetworkError> {
|
||||
let participant = self.connected_receiver.write().await.next().await?;
|
||||
let participant = self.connected_receiver.lock().await.next().await?;
|
||||
self.participant_disconnect_sender
|
||||
.write()
|
||||
.lock()
|
||||
.await
|
||||
.insert(participant.remote_pid, participant.a2s_disconnect_s.clone());
|
||||
Ok(participant)
|
||||
@ -430,8 +426,8 @@ impl Participant {
|
||||
Self {
|
||||
local_pid,
|
||||
remote_pid,
|
||||
a2b_stream_open_s: RwLock::new(a2b_stream_open_s),
|
||||
b2a_stream_opened_r: RwLock::new(b2a_stream_opened_r),
|
||||
a2b_stream_open_s: Mutex::new(a2b_stream_open_s),
|
||||
b2a_stream_opened_r: Mutex::new(b2a_stream_opened_r),
|
||||
a2s_disconnect_s: Arc::new(Mutex::new(Some(a2s_disconnect_s))),
|
||||
}
|
||||
}
|
||||
@ -477,11 +473,11 @@ impl Participant {
|
||||
///
|
||||
/// [`Streams`]: crate::api::Stream
|
||||
pub async fn open(&self, prio: u8, promises: Promises) -> Result<Stream, ParticipantError> {
|
||||
//use this lock for now to make sure that only one open at a time is made,
|
||||
// TODO: not sure if we can paralise that, check in future
|
||||
let mut a2b_stream_open_s = self.a2b_stream_open_s.write().await;
|
||||
let (p2a_return_stream_s, p2a_return_stream_r) = oneshot::channel();
|
||||
if let Err(e) = a2b_stream_open_s
|
||||
if let Err(e) = self
|
||||
.a2b_stream_open_s
|
||||
.lock()
|
||||
.await
|
||||
.send((prio, promises, p2a_return_stream_s))
|
||||
.await
|
||||
{
|
||||
@ -535,10 +531,7 @@ impl Participant {
|
||||
/// [`connected`]: Network::connected
|
||||
/// [`open`]: Participant::open
|
||||
pub async fn opened(&self) -> Result<Stream, ParticipantError> {
|
||||
//use this lock for now to make sure that only one open at a time is made,
|
||||
// TODO: not sure if we can paralise that, check in future
|
||||
let mut stream_opened_receiver = self.b2a_stream_opened_r.write().await;
|
||||
match stream_opened_receiver.next().await {
|
||||
match self.b2a_stream_opened_r.lock().await.next().await {
|
||||
Some(stream) => {
|
||||
let sid = stream.sid;
|
||||
debug!(?sid, ?self.remote_pid, "Receive opened stream");
|
||||
@ -861,7 +854,7 @@ impl Drop for Network {
|
||||
// we MUST avoid nested block_on, good that Network::Drop no longer triggers
|
||||
// Participant::Drop directly but just the BParticipant
|
||||
for (remote_pid, a2s_disconnect_s) in
|
||||
self.participant_disconnect_sender.write().await.drain()
|
||||
self.participant_disconnect_sender.lock().await.drain()
|
||||
{
|
||||
match a2s_disconnect_s.lock().await.take() {
|
||||
Some(mut a2s_disconnect_s) => {
|
||||
|
@ -8,7 +8,7 @@ use crate::{
|
||||
protocols::Protocols,
|
||||
types::{Cid, Frame, Pid, Prio, Promises, Sid},
|
||||
};
|
||||
use async_std::sync::RwLock;
|
||||
use async_std::sync::{Mutex, RwLock};
|
||||
use futures::{
|
||||
channel::{mpsc, oneshot},
|
||||
future::FutureExt,
|
||||
@ -46,7 +46,7 @@ struct StreamInfo {
|
||||
prio: Prio,
|
||||
promises: Promises,
|
||||
send_closed: Arc<AtomicBool>,
|
||||
b2a_msg_recv_s: mpsc::UnboundedSender<IncomingMessage>,
|
||||
b2a_msg_recv_s: Mutex<mpsc::UnboundedSender<IncomingMessage>>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
@ -71,7 +71,7 @@ pub struct BParticipant {
|
||||
remote_pid: Pid,
|
||||
remote_pid_string: String, //optimisation
|
||||
offset_sid: Sid,
|
||||
channels: Arc<RwLock<HashMap<Cid, ChannelInfo>>>,
|
||||
channels: Arc<RwLock<HashMap<Cid, Mutex<ChannelInfo>>>>,
|
||||
streams: RwLock<HashMap<Sid, StreamInfo>>,
|
||||
running_mgr: AtomicUsize,
|
||||
run_channels: Option<ControlChannels>,
|
||||
@ -252,10 +252,10 @@ impl BParticipant {
|
||||
frame: Frame,
|
||||
#[cfg(feature = "metrics")] frames_out_total_cache: &mut PidCidFrameCache,
|
||||
) -> bool {
|
||||
// find out ideal channel here
|
||||
//TODO: just take first
|
||||
let mut lock = self.channels.write().await;
|
||||
if let Some(ci) = lock.values_mut().next() {
|
||||
let mut drop_cid = None;
|
||||
// TODO: find out ideal channel here
|
||||
let res = if let Some(ci) = self.channels.read().await.values().next() {
|
||||
let mut ci = ci.lock().await;
|
||||
//we are increasing metrics without checking the result to please
|
||||
// borrow_checker. otherwise we would need to close `frame` what we
|
||||
// dont want!
|
||||
@ -266,20 +266,7 @@ impl BParticipant {
|
||||
if let Err(e) = ci.b2w_frame_s.send(frame).await {
|
||||
let cid = ci.cid;
|
||||
info!(?e, ?cid, "channel no longer available");
|
||||
if let Some(ci) = self.channels.write().await.remove(&cid) {
|
||||
trace!(?cid, "stopping read protocol");
|
||||
if let Err(e) = ci.b2r_read_shutdown.send(()) {
|
||||
trace!(?cid, ?e, "seems like was already shut down");
|
||||
}
|
||||
}
|
||||
//TODO FIXME tags: takeover channel multiple
|
||||
info!(
|
||||
"FIXME: the frame is actually drop. which is fine for now as the participant \
|
||||
will be closed, but not if we do channel-takeover"
|
||||
);
|
||||
//TEMP FIX: as we dont have channel takeover yet drop the whole bParticipant
|
||||
self.close_write_api(Some(ParticipantError::ProtocolFailedUnrecoverable))
|
||||
.await;
|
||||
drop_cid = Some(cid);
|
||||
false
|
||||
} else {
|
||||
true
|
||||
@ -301,7 +288,25 @@ impl BParticipant {
|
||||
guard.1 += 1;
|
||||
}
|
||||
false
|
||||
}
|
||||
};
|
||||
if let Some(cid) = drop_cid {
|
||||
if let Some(ci) = self.channels.write().await.remove(&cid) {
|
||||
let ci = ci.into_inner();
|
||||
trace!(?cid, "stopping read protocol");
|
||||
if let Err(e) = ci.b2r_read_shutdown.send(()) {
|
||||
trace!(?cid, ?e, "seems like was already shut down");
|
||||
}
|
||||
}
|
||||
//TODO FIXME tags: takeover channel multiple
|
||||
info!(
|
||||
"FIXME: the frame is actually drop. which is fine for now as the participant will \
|
||||
be closed, but not if we do channel-takeover"
|
||||
);
|
||||
//TEMP FIX: as we dont have channel takeover yet drop the whole bParticipant
|
||||
self.close_write_api(Some(ParticipantError::ProtocolFailedUnrecoverable))
|
||||
.await;
|
||||
};
|
||||
res
|
||||
}
|
||||
|
||||
async fn handle_frames_mgr(
|
||||
@ -325,7 +330,8 @@ impl BParticipant {
|
||||
Err(()) => {
|
||||
// The read protocol stopped, i need to make sure that write gets stopped
|
||||
debug!("read protocol was closed. Stopping write protocol");
|
||||
if let Some(ci) = self.channels.write().await.get_mut(&cid) {
|
||||
if let Some(ci) = self.channels.read().await.get(&cid) {
|
||||
let mut ci = ci.lock().await;
|
||||
ci.b2w_frame_s
|
||||
.close()
|
||||
.await
|
||||
@ -381,7 +387,7 @@ impl BParticipant {
|
||||
.with_label_values(&[&self.remote_pid_string])
|
||||
.inc();
|
||||
si.send_closed.store(true, Ordering::Relaxed);
|
||||
si.b2a_msg_recv_s.close_channel();
|
||||
si.b2a_msg_recv_s.into_inner().close_channel();
|
||||
trace!(?sid, "Closed stream from remote");
|
||||
} else {
|
||||
warn!(
|
||||
@ -414,8 +420,8 @@ impl BParticipant {
|
||||
if finished {
|
||||
//trace!(?mid, "finished receiving message");
|
||||
let imsg = messages.remove(&mid).unwrap();
|
||||
if let Some(si) = self.streams.write().await.get_mut(&imsg.sid) {
|
||||
if let Err(e) = si.b2a_msg_recv_s.send(imsg).await {
|
||||
if let Some(si) = self.streams.read().await.get(&imsg.sid) {
|
||||
if let Err(e) = si.b2a_msg_recv_s.lock().await.send(imsg).await {
|
||||
warn!(
|
||||
?e,
|
||||
?mid,
|
||||
@ -482,12 +488,15 @@ impl BParticipant {
|
||||
let channels = self.channels.clone();
|
||||
async move {
|
||||
let (channel, b2w_frame_s, b2r_read_shutdown) = Channel::new(cid);
|
||||
channels.write().await.insert(cid, ChannelInfo {
|
||||
channels.write().await.insert(
|
||||
cid,
|
||||
cid_string: cid.to_string(),
|
||||
b2w_frame_s,
|
||||
b2r_read_shutdown,
|
||||
});
|
||||
Mutex::new(ChannelInfo {
|
||||
cid,
|
||||
cid_string: cid.to_string(),
|
||||
b2w_frame_s,
|
||||
b2r_read_shutdown,
|
||||
}),
|
||||
);
|
||||
b2s_create_channel_done_s.send(()).unwrap();
|
||||
#[cfg(feature = "metrics")]
|
||||
self.metrics
|
||||
@ -619,6 +628,7 @@ impl BParticipant {
|
||||
|
||||
debug!("Closing all channels, after flushed prios");
|
||||
for (cid, ci) in self.channels.write().await.drain() {
|
||||
let ci = ci.into_inner();
|
||||
if let Err(e) = ci.b2r_read_shutdown.send(()) {
|
||||
debug!(
|
||||
?e,
|
||||
@ -695,7 +705,7 @@ impl BParticipant {
|
||||
match self.streams.read().await.get(&sid) {
|
||||
Some(si) => {
|
||||
si.send_closed.store(true, Ordering::Relaxed);
|
||||
si.b2a_msg_recv_s.close_channel();
|
||||
si.b2a_msg_recv_s.lock().await.close_channel();
|
||||
},
|
||||
None => warn!("Couldn't find the stream, might be simultaneous close from remote"),
|
||||
}
|
||||
@ -742,7 +752,7 @@ impl BParticipant {
|
||||
prio,
|
||||
promises,
|
||||
send_closed: send_closed.clone(),
|
||||
b2a_msg_recv_s,
|
||||
b2a_msg_recv_s: Mutex::new(b2a_msg_recv_s),
|
||||
});
|
||||
#[cfg(feature = "metrics")]
|
||||
self.metrics
|
||||
@ -770,7 +780,7 @@ impl BParticipant {
|
||||
lock.b2a_stream_opened_s.close_channel();
|
||||
|
||||
debug!("Closing all streams for write");
|
||||
for (sid, si) in self.streams.write().await.iter() {
|
||||
for (sid, si) in self.streams.read().await.iter() {
|
||||
trace!(?sid, "Shutting down Stream for write");
|
||||
si.send_closed.store(true, Ordering::Relaxed);
|
||||
}
|
||||
@ -783,7 +793,7 @@ impl BParticipant {
|
||||
debug!("Closing all streams");
|
||||
for (sid, si) in self.streams.write().await.drain() {
|
||||
trace!(?sid, "Shutting down Stream");
|
||||
si.b2a_msg_recv_s.close_channel();
|
||||
si.b2a_msg_recv_s.lock().await.close_channel();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -7,10 +7,7 @@ use crate::{
|
||||
protocols::{Protocols, TcpProtocol, UdpProtocol},
|
||||
types::Pid,
|
||||
};
|
||||
use async_std::{
|
||||
io, net,
|
||||
sync::{Mutex, RwLock},
|
||||
};
|
||||
use async_std::{io, net, sync::Mutex};
|
||||
use futures::{
|
||||
channel::{mpsc, oneshot},
|
||||
executor::ThreadPool,
|
||||
@ -76,9 +73,9 @@ pub struct Scheduler {
|
||||
pool: Arc<ThreadPool>,
|
||||
run_channels: Option<ControlChannels>,
|
||||
participant_channels: Arc<Mutex<Option<ParticipantChannels>>>,
|
||||
participants: Arc<RwLock<HashMap<Pid, ParticipantInfo>>>,
|
||||
participants: Arc<Mutex<HashMap<Pid, ParticipantInfo>>>,
|
||||
channel_ids: Arc<AtomicU64>,
|
||||
channel_listener: RwLock<HashMap<ProtocolAddr, oneshot::Sender<()>>>,
|
||||
channel_listener: Mutex<HashMap<ProtocolAddr, oneshot::Sender<()>>>,
|
||||
#[cfg(feature = "metrics")]
|
||||
metrics: Arc<NetworkMetrics>,
|
||||
}
|
||||
@ -136,9 +133,9 @@ impl Scheduler {
|
||||
pool: Arc::new(ThreadPool::new().unwrap()),
|
||||
run_channels,
|
||||
participant_channels: Arc::new(Mutex::new(Some(participant_channels))),
|
||||
participants: Arc::new(RwLock::new(HashMap::new())),
|
||||
participants: Arc::new(Mutex::new(HashMap::new())),
|
||||
channel_ids: Arc::new(AtomicU64::new(0)),
|
||||
channel_listener: RwLock::new(HashMap::new()),
|
||||
channel_listener: Mutex::new(HashMap::new()),
|
||||
#[cfg(feature = "metrics")]
|
||||
metrics,
|
||||
},
|
||||
@ -180,7 +177,7 @@ impl Scheduler {
|
||||
.inc();
|
||||
let (end_sender, end_receiver) = oneshot::channel::<()>();
|
||||
self.channel_listener
|
||||
.write()
|
||||
.lock()
|
||||
.await
|
||||
.insert(address.clone(), end_sender);
|
||||
self.channel_creator(address, end_receiver, s2a_listen_result_s)
|
||||
@ -273,7 +270,7 @@ impl Scheduler {
|
||||
// 3. Participant will try to access the BParticipant senders and receivers with
|
||||
// their next api action, it will fail and be closed then.
|
||||
trace!(?pid, "Got request to close participant");
|
||||
if let Some(mut pi) = self.participants.write().await.remove(&pid) {
|
||||
if let Some(mut pi) = self.participants.lock().await.remove(&pid) {
|
||||
let (finished_sender, finished_receiver) = oneshot::channel();
|
||||
pi.s2b_shutdown_bparticipant_s
|
||||
.take()
|
||||
@ -310,7 +307,7 @@ impl Scheduler {
|
||||
a2s_scheduler_shutdown_r.await.unwrap();
|
||||
self.closed.store(true, Ordering::Relaxed);
|
||||
debug!("Shutting down all BParticipants gracefully");
|
||||
let mut participants = self.participants.write().await;
|
||||
let mut participants = self.participants.lock().await;
|
||||
let waitings = participants
|
||||
.drain()
|
||||
.map(|(pid, mut pi)| {
|
||||
@ -336,7 +333,7 @@ impl Scheduler {
|
||||
};
|
||||
}
|
||||
debug!("shutting down protocol listeners");
|
||||
for (addr, end_channel_sender) in self.channel_listener.write().await.drain() {
|
||||
for (addr, end_channel_sender) in self.channel_listener.lock().await.drain() {
|
||||
trace!(?addr, "stopping listen on protocol");
|
||||
if let Err(e) = end_channel_sender.send(()) {
|
||||
warn!(?addr, ?e, "listener crashed/disconnected already");
|
||||
@ -531,7 +528,7 @@ impl Scheduler {
|
||||
?pid,
|
||||
"Detected that my channel is ready!, activating it :)"
|
||||
);
|
||||
let mut participants = participants.write().await;
|
||||
let mut participants = participants.lock().await;
|
||||
if !participants.contains_key(&pid) {
|
||||
debug!(?cid, "New participant connected via a channel");
|
||||
let (
|
||||
|
Loading…
Reference in New Issue
Block a user