mirror of
https://gitlab.com/veloren/veloren.git
synced 2024-08-30 18:12:32 +00:00
Merge branch 'xMAC94x/server-stability' into 'master'
track state tick in a historgram too See merge request veloren/veloren!1998
This commit is contained in:
commit
4279720947
@ -129,7 +129,7 @@ impl Message {
|
||||
#[cfg(debug_assertions)]
|
||||
pub(crate) fn verify(&self, params: StreamParams) {
|
||||
#[cfg(not(feature = "compression"))]
|
||||
let _stream = stream;
|
||||
let _params = params;
|
||||
#[cfg(feature = "compression")]
|
||||
if self.compressed != params.promises.contains(Promises::COMPRESSED) {
|
||||
warn!(
|
||||
|
@ -9,6 +9,8 @@ use std::error::Error;
|
||||
pub struct NetworkMetrics {
|
||||
pub listen_requests_total: IntCounterVec,
|
||||
pub connect_requests_total: IntCounterVec,
|
||||
pub incoming_connections_total: IntCounterVec,
|
||||
pub failed_handshakes_total: IntCounter,
|
||||
pub participants_connected_total: IntCounter,
|
||||
pub participants_disconnected_total: IntCounter,
|
||||
// channel id's, seperated by PARTICIPANT, max 5
|
||||
@ -44,6 +46,17 @@ impl NetworkMetrics {
|
||||
),
|
||||
&["protocol"],
|
||||
)?;
|
||||
let incoming_connections_total = IntCounterVec::new(
|
||||
Opts::new(
|
||||
"incoming_connections_total",
|
||||
"Shows the number of external requests to the scheduler",
|
||||
),
|
||||
&["protocol"],
|
||||
)?;
|
||||
let failed_handshakes_total = IntCounter::with_opts(Opts::new(
|
||||
"failed_handshakes_total",
|
||||
"Shows the number of failed handshakes",
|
||||
))?;
|
||||
let participants_connected_total = IntCounter::with_opts(Opts::new(
|
||||
"participants_connected_total",
|
||||
"Shows the number of participants connected to the network",
|
||||
@ -110,6 +123,8 @@ impl NetworkMetrics {
|
||||
Ok(Self {
|
||||
listen_requests_total,
|
||||
connect_requests_total,
|
||||
incoming_connections_total,
|
||||
failed_handshakes_total,
|
||||
participants_connected_total,
|
||||
participants_disconnected_total,
|
||||
participants_channel_ids,
|
||||
@ -125,6 +140,8 @@ impl NetworkMetrics {
|
||||
pub fn register(&self, registry: &Registry) -> Result<(), Box<dyn Error>> {
|
||||
registry.register(Box::new(self.listen_requests_total.clone()))?;
|
||||
registry.register(Box::new(self.connect_requests_total.clone()))?;
|
||||
registry.register(Box::new(self.incoming_connections_total.clone()))?;
|
||||
registry.register(Box::new(self.failed_handshakes_total.clone()))?;
|
||||
registry.register(Box::new(self.participants_connected_total.clone()))?;
|
||||
registry.register(Box::new(self.participants_disconnected_total.clone()))?;
|
||||
registry.register(Box::new(self.participants_channel_ids.clone()))?;
|
||||
@ -137,6 +154,11 @@ impl NetworkMetrics {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) fn connect_requests_cache(&self, protocol: &ProtocolAddr) -> prometheus::IntCounter {
|
||||
self.incoming_connections_total
|
||||
.with_label_values(&[protocol_name(protocol)])
|
||||
}
|
||||
|
||||
pub(crate) fn channels_connected(&self, remote_p: &str, no: usize, cid: Cid) {
|
||||
self.channels_connected_total
|
||||
.with_label_values(&[remote_p])
|
||||
|
@ -384,6 +384,8 @@ impl Scheduler {
|
||||
s2a_listen_result_s: oneshot::Sender<io::Result<()>>,
|
||||
) {
|
||||
trace!(?addr, "Start up channel creator");
|
||||
#[cfg(feature = "metrics")]
|
||||
let mcache = self.metrics.connect_requests_cache(&addr);
|
||||
match addr {
|
||||
ProtocolAddr::Tcp(addr) => {
|
||||
let listener = match net::TcpListener::bind(addr).await {
|
||||
@ -395,7 +397,7 @@ impl Scheduler {
|
||||
info!(
|
||||
?addr,
|
||||
?e,
|
||||
"Tcp bind error durin listener startup"
|
||||
"Tcp bind error during listener startup"
|
||||
);
|
||||
s2a_listen_result_s.send(Err(e)).unwrap();
|
||||
return;
|
||||
@ -414,8 +416,10 @@ impl Scheduler {
|
||||
continue;
|
||||
},
|
||||
};
|
||||
info!("Accepting Tcp from: {}", remote_addr);
|
||||
#[cfg(feature = "metrics")]
|
||||
mcache.inc();
|
||||
let cid = self.channel_ids.fetch_add(1, Ordering::Relaxed);
|
||||
info!(?remote_addr, ?cid, "Accepting Tcp from");
|
||||
self.init_protocol(Protocols::new_tcp(stream, cid, Arc::clone(&self.protocol_metrics)), cid, None, true)
|
||||
.await;
|
||||
}
|
||||
@ -433,8 +437,10 @@ impl Scheduler {
|
||||
} {
|
||||
let (remote_to_local_s, remote_to_local_r) = mpsc::channel(Self::MPSC_CHANNEL_BOUND);
|
||||
local_remote_to_local_s.send(remote_to_local_s).unwrap();
|
||||
info!(?addr, "Accepting Mpsc from");
|
||||
#[cfg(feature = "metrics")]
|
||||
mcache.inc();
|
||||
let cid = self.channel_ids.fetch_add(1, Ordering::Relaxed);
|
||||
info!(?addr, ?cid, "Accepting Mpsc from");
|
||||
self.init_protocol(Protocols::new_mpsc(local_to_remote_s, remote_to_local_r, cid, Arc::clone(&self.protocol_metrics)), cid, None, true)
|
||||
.await;
|
||||
}
|
||||
@ -650,6 +656,8 @@ impl Scheduler {
|
||||
},
|
||||
Err(e) => {
|
||||
debug!(?cid, ?e, "Handshake from a new connection failed");
|
||||
#[cfg(feature = "metrics")]
|
||||
metrics.failed_handshakes_total.inc();
|
||||
if let Some(pid_oneshot) = s2a_return_pid_s {
|
||||
// someone is waiting with `connect`, so give them their Error
|
||||
trace!(?cid, "returning the Err to api who requested the connect");
|
||||
|
@ -704,14 +704,11 @@ impl Server {
|
||||
.set((before_persistence_updates - before_entity_cleanup).as_nanos() as i64);
|
||||
tt.with_label_values(&["persistence_updates"])
|
||||
.set((end_of_server_tick - before_persistence_updates).as_nanos() as i64);
|
||||
}
|
||||
|
||||
#[cfg(feature = "tracy")]
|
||||
{
|
||||
use common_base::tracy_client::{create_plot, Plot};
|
||||
let entity_count = self.state.ecs().entities().join().count();
|
||||
static ENTITY_COUNT: Plot = create_plot!("entity count");
|
||||
ENTITY_COUNT.point(entity_count as f64);
|
||||
tick_metrics.tick_time_hist.observe(
|
||||
end_of_server_tick
|
||||
.duration_since(before_state_tick)
|
||||
.as_secs_f64(),
|
||||
);
|
||||
}
|
||||
|
||||
// 9) Finish the tick, pass control back to the frontend.
|
||||
|
@ -1,6 +1,6 @@
|
||||
use prometheus::{
|
||||
Gauge, GaugeVec, HistogramOpts, HistogramVec, IntCounter, IntCounterVec, IntGauge, IntGaugeVec,
|
||||
Opts, Registry,
|
||||
Gauge, GaugeVec, Histogram, HistogramOpts, HistogramVec, IntCounter, IntCounterVec, IntGauge,
|
||||
IntGaugeVec, Opts, Registry,
|
||||
};
|
||||
use std::{
|
||||
convert::TryInto,
|
||||
@ -48,6 +48,7 @@ pub struct TickMetrics {
|
||||
pub chunk_groups_count: IntGauge,
|
||||
pub entity_count: IntGauge,
|
||||
pub tick_time: IntGaugeVec,
|
||||
pub tick_time_hist: Histogram,
|
||||
pub build_info: IntGauge,
|
||||
pub start_time: IntGauge,
|
||||
pub time_of_day: Gauge,
|
||||
@ -262,6 +263,28 @@ impl TickMetrics {
|
||||
Opts::new("tick_time", "time in ns required for a tick of the server"),
|
||||
&["period"],
|
||||
)?;
|
||||
// 33.33ms is the ideal tick time. So we have hight detail around it.
|
||||
// 300/700 are to detect high I/O blocks
|
||||
let bucket = vec![
|
||||
Duration::from_millis(8).as_secs_f64(),
|
||||
Duration::from_millis(16).as_secs_f64(),
|
||||
Duration::from_millis(24).as_secs_f64(),
|
||||
Duration::from_millis(30).as_secs_f64(),
|
||||
Duration::from_millis(33).as_secs_f64(),
|
||||
Duration::from_millis(37).as_secs_f64(),
|
||||
Duration::from_millis(45).as_secs_f64(),
|
||||
Duration::from_millis(60).as_secs_f64(),
|
||||
Duration::from_millis(100).as_secs_f64(),
|
||||
Duration::from_millis(300).as_secs_f64(),
|
||||
Duration::from_millis(700).as_secs_f64(),
|
||||
];
|
||||
let tick_time_hist = Histogram::with_opts(
|
||||
HistogramOpts::new(
|
||||
"tick_time_hist",
|
||||
"shows the detailed time in ns spend for the whole tick as histogram",
|
||||
)
|
||||
.buckets(bucket),
|
||||
)?;
|
||||
|
||||
let since_the_epoch = SystemTime::now()
|
||||
.duration_since(UNIX_EPOCH)
|
||||
@ -277,6 +300,7 @@ impl TickMetrics {
|
||||
registry.register(Box::new(time_of_day.clone()))?;
|
||||
registry.register(Box::new(light_count.clone()))?;
|
||||
registry.register(Box::new(tick_time.clone()))?;
|
||||
registry.register(Box::new(tick_time_hist.clone()))?;
|
||||
|
||||
Ok(Self {
|
||||
chonks_count,
|
||||
@ -284,6 +308,7 @@ impl TickMetrics {
|
||||
chunk_groups_count,
|
||||
entity_count,
|
||||
tick_time,
|
||||
tick_time_hist,
|
||||
build_info,
|
||||
start_time,
|
||||
time_of_day,
|
||||
|
@ -102,6 +102,7 @@ impl<'a> System<'a> for Sys {
|
||||
if let Some(entities) = entities {
|
||||
let entity_count = entities.join().count();
|
||||
export_tick.entity_count.set(entity_count as i64);
|
||||
common_base::plot!("entity count", entity_count as f64);
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user