mirror of
https://gitlab.com/veloren/veloren.git
synced 2024-08-30 18:12:32 +00:00
Better Metrics for Server
- Player online is now seperated into players connected and disconnected and is event driven - Metrics for ChunkGeneration: this is the server side for tracking actuall generation - Metrics for Chunk Network Requests
This commit is contained in:
parent
00598e6d4b
commit
184d405f10
@ -1,3 +1,4 @@
|
||||
use crate::metrics::ChunkGenMetrics;
|
||||
#[cfg(not(feature = "worldgen"))]
|
||||
use crate::test_world::{IndexOwned, World};
|
||||
use common::{generation::ChunkSupplement, terrain::TerrainChunk};
|
||||
@ -21,15 +22,17 @@ pub struct ChunkGenerator {
|
||||
chunk_tx: channel::Sender<ChunkGenResult>,
|
||||
chunk_rx: channel::Receiver<ChunkGenResult>,
|
||||
pending_chunks: HashMap<Vec2<i32>, Arc<AtomicBool>>,
|
||||
metrics: Arc<ChunkGenMetrics>,
|
||||
}
|
||||
impl ChunkGenerator {
|
||||
#[allow(clippy::new_without_default)] // TODO: Pending review in #587
|
||||
pub fn new() -> Self {
|
||||
pub fn new(metrics: ChunkGenMetrics) -> Self {
|
||||
let (chunk_tx, chunk_rx) = channel::unbounded();
|
||||
Self {
|
||||
chunk_tx,
|
||||
chunk_rx,
|
||||
pending_chunks: HashMap::new(),
|
||||
metrics: Arc::new(metrics),
|
||||
}
|
||||
}
|
||||
|
||||
@ -49,6 +52,7 @@ impl ChunkGenerator {
|
||||
let cancel = Arc::new(AtomicBool::new(false));
|
||||
v.insert(Arc::clone(&cancel));
|
||||
let chunk_tx = self.chunk_tx.clone();
|
||||
self.metrics.chunks_requested.inc();
|
||||
thread_pool.execute(move || {
|
||||
let index = index.as_index_ref();
|
||||
let payload = world
|
||||
@ -61,6 +65,7 @@ impl ChunkGenerator {
|
||||
pub fn recv_new_chunk(&mut self) -> Option<ChunkGenResult> {
|
||||
if let Ok((key, res)) = self.chunk_rx.try_recv() {
|
||||
self.pending_chunks.remove(&key);
|
||||
self.metrics.chunks_served.inc();
|
||||
// TODO: do anything else if res is an Err?
|
||||
Some((key, res))
|
||||
} else {
|
||||
@ -75,12 +80,15 @@ impl ChunkGenerator {
|
||||
pub fn cancel_if_pending(&mut self, key: Vec2<i32>) {
|
||||
if let Some(cancel) = self.pending_chunks.remove(&key) {
|
||||
cancel.store(true, Ordering::Relaxed);
|
||||
self.metrics.chunks_canceled.inc();
|
||||
}
|
||||
}
|
||||
|
||||
pub fn cancel_all(&mut self) {
|
||||
let metrics = self.metrics.clone();
|
||||
self.pending_chunks.drain().for_each(|(_, cancel)| {
|
||||
cancel.store(true, Ordering::Relaxed);
|
||||
metrics.chunks_canceled.inc();
|
||||
});
|
||||
}
|
||||
}
|
||||
|
@ -97,6 +97,11 @@ impl Server {
|
||||
#[allow(clippy::expect_fun_call)] // TODO: Pending review in #587
|
||||
#[allow(clippy::needless_update)] // TODO: Pending review in #587
|
||||
pub fn new(settings: ServerSettings) -> Result<Self, Error> {
|
||||
let (chunk_gen_metrics, registry_chunk) = metrics::ChunkGenMetrics::new().unwrap();
|
||||
let (network_request_metrics, registry_network) =
|
||||
metrics::NetworkRequestMetrics::new().unwrap();
|
||||
let (player_metrics, registry_player) = metrics::PlayerMetrics::new().unwrap();
|
||||
|
||||
let mut state = State::default();
|
||||
state.ecs_mut().insert(settings.clone());
|
||||
state.ecs_mut().insert(EventBus::<ServerEvent>::default());
|
||||
@ -104,7 +109,11 @@ impl Server {
|
||||
.ecs_mut()
|
||||
.insert(LoginProvider::new(settings.auth_server_address.clone()));
|
||||
state.ecs_mut().insert(Tick(0));
|
||||
state.ecs_mut().insert(ChunkGenerator::new());
|
||||
state.ecs_mut().insert(network_request_metrics);
|
||||
state.ecs_mut().insert(player_metrics);
|
||||
state
|
||||
.ecs_mut()
|
||||
.insert(ChunkGenerator::new(chunk_gen_metrics));
|
||||
state
|
||||
.ecs_mut()
|
||||
.insert(CharacterUpdater::new(settings.persistence_db_dir.clone()));
|
||||
@ -275,11 +284,12 @@ impl Server {
|
||||
|
||||
let mut metrics = ServerMetrics::new();
|
||||
// register all metrics submodules here
|
||||
let tick_metrics = TickMetrics::new(metrics.tick_clone())
|
||||
let (tick_metrics, registry_tick) = TickMetrics::new(metrics.tick_clone())
|
||||
.expect("Failed to initialize server tick metrics submodule.");
|
||||
tick_metrics
|
||||
.register(&metrics.registry())
|
||||
.expect("failed to register tick metrics");
|
||||
registry_chunk(&metrics.registry()).expect("failed to register chunk gen metrics");
|
||||
registry_network(&metrics.registry()).expect("failed to register network request metrics");
|
||||
registry_player(&metrics.registry()).expect("failed to register player metrics");
|
||||
registry_tick(&metrics.registry()).expect("failed to register tick metrics");
|
||||
|
||||
let thread_pool = ThreadPoolBuilder::new()
|
||||
.name("veloren-worker".to_string())
|
||||
@ -646,9 +656,6 @@ impl Server {
|
||||
.set(stats_persistence_nanos);
|
||||
|
||||
// Report other info
|
||||
self.tick_metrics
|
||||
.player_online
|
||||
.set(self.state.ecs().read_storage::<Client>().join().count() as i64);
|
||||
self.tick_metrics
|
||||
.time_of_day
|
||||
.set(self.state.ecs().read_resource::<TimeOfDay>().0);
|
||||
@ -813,7 +820,9 @@ impl Server {
|
||||
.is_some()
|
||||
}
|
||||
|
||||
pub fn number_of_players(&self) -> i64 { self.tick_metrics.player_online.get() }
|
||||
pub fn number_of_players(&self) -> i64 {
|
||||
self.state.ecs().read_storage::<Client>().join().count() as i64
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for Server {
|
||||
|
@ -1,4 +1,6 @@
|
||||
use prometheus::{Encoder, Gauge, IntGauge, IntGaugeVec, Opts, Registry, TextEncoder};
|
||||
use prometheus::{
|
||||
Encoder, Gauge, IntCounter, IntCounterVec, IntGauge, IntGaugeVec, Opts, Registry, TextEncoder,
|
||||
};
|
||||
use std::{
|
||||
convert::TryInto,
|
||||
error::Error,
|
||||
@ -12,10 +14,28 @@ use std::{
|
||||
};
|
||||
use tracing::{debug, error};
|
||||
|
||||
type RegistryFn = Box<dyn FnOnce(&Registry) -> Result<(), prometheus::Error>>;
|
||||
|
||||
pub struct PlayerMetrics {
|
||||
pub players_connected: IntCounter,
|
||||
pub players_disconnected: IntCounterVec, // timeout, network_error, gracefully
|
||||
}
|
||||
|
||||
pub struct NetworkRequestMetrics {
|
||||
pub chunks_request_dropped: IntCounter,
|
||||
pub chunks_served_from_cache: IntCounter,
|
||||
pub chunks_generation_triggered: IntCounter,
|
||||
}
|
||||
|
||||
pub struct ChunkGenMetrics {
|
||||
pub chunks_requested: IntCounter,
|
||||
pub chunks_served: IntCounter,
|
||||
pub chunks_canceled: IntCounter,
|
||||
}
|
||||
|
||||
pub struct TickMetrics {
|
||||
pub chonks_count: IntGauge,
|
||||
pub chunks_count: IntGauge,
|
||||
pub player_online: IntGauge,
|
||||
pub entity_count: IntGauge,
|
||||
pub tick_time: IntGaugeVec,
|
||||
pub build_info: IntGauge,
|
||||
@ -32,11 +52,122 @@ pub struct ServerMetrics {
|
||||
tick: Arc<AtomicU64>,
|
||||
}
|
||||
|
||||
impl PlayerMetrics {
|
||||
pub fn new() -> Result<(Self, RegistryFn), prometheus::Error> {
|
||||
let players_connected = IntCounter::with_opts(Opts::new(
|
||||
"players_connected",
|
||||
"shows the number of clients joined to the server",
|
||||
))?;
|
||||
let players_disconnected = IntCounterVec::new(
|
||||
Opts::new(
|
||||
"players_disconnected",
|
||||
"shows the number of clients disconnected from the server and the reason",
|
||||
),
|
||||
&["reason"],
|
||||
)?;
|
||||
|
||||
let players_connected_clone = players_connected.clone();
|
||||
let players_disconnected_clone = players_disconnected.clone();
|
||||
|
||||
let f = |registry: &Registry| {
|
||||
registry.register(Box::new(players_connected_clone))?;
|
||||
registry.register(Box::new(players_disconnected_clone))?;
|
||||
Ok(())
|
||||
};
|
||||
|
||||
Ok((
|
||||
Self {
|
||||
players_connected,
|
||||
players_disconnected,
|
||||
},
|
||||
Box::new(f),
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
impl NetworkRequestMetrics {
|
||||
pub fn new() -> Result<(Self, RegistryFn), prometheus::Error> {
|
||||
let chunks_request_dropped = IntCounter::with_opts(Opts::new(
|
||||
"chunks_request_dropped",
|
||||
"number of all chunk request dropped, e.g because the player was to far away",
|
||||
))?;
|
||||
let chunks_served_from_cache = IntCounter::with_opts(Opts::new(
|
||||
"chunks_served_from_cache",
|
||||
"number of all requested chunks already generated and could be served out of cache",
|
||||
))?;
|
||||
let chunks_generation_triggered = IntCounter::with_opts(Opts::new(
|
||||
"chunks_generation_triggered",
|
||||
"number of all chunks that were requested and needs to be generated",
|
||||
))?;
|
||||
|
||||
let chunks_request_dropped_clone = chunks_request_dropped.clone();
|
||||
let chunks_served_from_cache_clone = chunks_served_from_cache.clone();
|
||||
let chunks_generation_triggered_clone = chunks_generation_triggered.clone();
|
||||
|
||||
let f = |registry: &Registry| {
|
||||
registry.register(Box::new(chunks_request_dropped_clone))?;
|
||||
registry.register(Box::new(chunks_served_from_cache_clone))?;
|
||||
registry.register(Box::new(chunks_generation_triggered_clone))?;
|
||||
Ok(())
|
||||
};
|
||||
|
||||
Ok((
|
||||
Self {
|
||||
chunks_request_dropped,
|
||||
chunks_served_from_cache,
|
||||
chunks_generation_triggered,
|
||||
},
|
||||
Box::new(f),
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
impl ChunkGenMetrics {
|
||||
pub fn new() -> Result<(Self, RegistryFn), prometheus::Error> {
|
||||
let chunks_requested = IntCounter::with_opts(Opts::new(
|
||||
"chunks_requested",
|
||||
"number of all chunks requested on the server",
|
||||
))?;
|
||||
let chunks_served = IntCounter::with_opts(Opts::new(
|
||||
"chunks_served",
|
||||
"number of all requested chunks already served on the server",
|
||||
))?;
|
||||
let chunks_canceled = IntCounter::with_opts(Opts::new(
|
||||
"chunks_canceled",
|
||||
"number of all canceled chunks on the server",
|
||||
))?;
|
||||
|
||||
let chunks_requested_clone = chunks_requested.clone();
|
||||
let chunks_served_clone = chunks_served.clone();
|
||||
let chunks_canceled_clone = chunks_canceled.clone();
|
||||
|
||||
let f = |registry: &Registry| {
|
||||
registry.register(Box::new(chunks_requested_clone))?;
|
||||
registry.register(Box::new(chunks_served_clone))?;
|
||||
registry.register(Box::new(chunks_canceled_clone))?;
|
||||
Ok(())
|
||||
};
|
||||
|
||||
Ok((
|
||||
Self {
|
||||
chunks_requested,
|
||||
chunks_served,
|
||||
chunks_canceled,
|
||||
},
|
||||
Box::new(f),
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
impl TickMetrics {
|
||||
pub fn new(tick: Arc<AtomicU64>) -> Result<Self, Box<dyn Error>> {
|
||||
let player_online = IntGauge::with_opts(Opts::new(
|
||||
"player_online",
|
||||
"shows the number of clients connected to the server",
|
||||
pub fn new(tick: Arc<AtomicU64>) -> Result<(Self, RegistryFn), Box<dyn Error>> {
|
||||
let chonks_count = IntGauge::with_opts(Opts::new(
|
||||
"chonks_count",
|
||||
"number of all chonks currently active on the server",
|
||||
))?;
|
||||
let chunks_count = IntGauge::with_opts(Opts::new(
|
||||
"chunks_count",
|
||||
"number of all chunks currently active on the server",
|
||||
))?;
|
||||
let entity_count = IntGauge::with_opts(Opts::new(
|
||||
"entity_count",
|
||||
@ -56,14 +187,6 @@ impl TickMetrics {
|
||||
"light_count",
|
||||
"number of all lights currently active on the server",
|
||||
))?;
|
||||
let chonks_count = IntGauge::with_opts(Opts::new(
|
||||
"chonks_count",
|
||||
"number of all chonks currently active on the server",
|
||||
))?;
|
||||
let chunks_count = IntGauge::with_opts(Opts::new(
|
||||
"chunks_count",
|
||||
"number of all chunks currently active on the server",
|
||||
))?;
|
||||
let tick_time = IntGaugeVec::new(
|
||||
Opts::new("tick_time", "time in ns required for a tick of the server"),
|
||||
&["period"],
|
||||
@ -74,30 +197,41 @@ impl TickMetrics {
|
||||
.expect("Time went backwards");
|
||||
start_time.set(since_the_epoch.as_secs().try_into()?);
|
||||
|
||||
Ok(Self {
|
||||
chonks_count,
|
||||
chunks_count,
|
||||
player_online,
|
||||
entity_count,
|
||||
tick_time,
|
||||
build_info,
|
||||
start_time,
|
||||
time_of_day,
|
||||
light_count,
|
||||
tick,
|
||||
})
|
||||
}
|
||||
let chonks_count_clone = chonks_count.clone();
|
||||
let chunks_count_clone = chunks_count.clone();
|
||||
let entity_count_clone = entity_count.clone();
|
||||
let build_info_clone = build_info.clone();
|
||||
let start_time_clone = start_time.clone();
|
||||
let time_of_day_clone = time_of_day.clone();
|
||||
let light_count_clone = light_count.clone();
|
||||
let tick_time_clone = tick_time.clone();
|
||||
|
||||
pub fn register(&self, registry: &Registry) -> Result<(), Box<dyn Error>> {
|
||||
registry.register(Box::new(self.player_online.clone()))?;
|
||||
registry.register(Box::new(self.entity_count.clone()))?;
|
||||
registry.register(Box::new(self.build_info.clone()))?;
|
||||
registry.register(Box::new(self.start_time.clone()))?;
|
||||
registry.register(Box::new(self.time_of_day.clone()))?;
|
||||
registry.register(Box::new(self.chonks_count.clone()))?;
|
||||
registry.register(Box::new(self.chunks_count.clone()))?;
|
||||
registry.register(Box::new(self.tick_time.clone()))?;
|
||||
Ok(())
|
||||
let f = |registry: &Registry| {
|
||||
registry.register(Box::new(chonks_count_clone))?;
|
||||
registry.register(Box::new(chunks_count_clone))?;
|
||||
registry.register(Box::new(entity_count_clone))?;
|
||||
registry.register(Box::new(build_info_clone))?;
|
||||
registry.register(Box::new(start_time_clone))?;
|
||||
registry.register(Box::new(time_of_day_clone))?;
|
||||
registry.register(Box::new(light_count_clone))?;
|
||||
registry.register(Box::new(tick_time_clone))?;
|
||||
Ok(())
|
||||
};
|
||||
|
||||
Ok((
|
||||
Self {
|
||||
chonks_count,
|
||||
chunks_count,
|
||||
entity_count,
|
||||
tick_time,
|
||||
build_info,
|
||||
start_time,
|
||||
time_of_day,
|
||||
light_count,
|
||||
tick,
|
||||
},
|
||||
Box::new(f),
|
||||
))
|
||||
}
|
||||
|
||||
pub fn is_100th_tick(&self) -> bool { self.tick.load(Ordering::Relaxed).rem_euclid(100) == 0 }
|
||||
|
@ -1,7 +1,11 @@
|
||||
use super::SysTimer;
|
||||
use crate::{
|
||||
alias_validator::AliasValidator, client::Client, login_provider::LoginProvider,
|
||||
persistence::character::CharacterLoader, ServerSettings,
|
||||
alias_validator::AliasValidator,
|
||||
client::Client,
|
||||
login_provider::LoginProvider,
|
||||
metrics::{NetworkRequestMetrics, PlayerMetrics},
|
||||
persistence::character::CharacterLoader,
|
||||
ServerSettings,
|
||||
};
|
||||
use common::{
|
||||
comp::{
|
||||
@ -43,6 +47,8 @@ impl Sys {
|
||||
cnt: &mut u64,
|
||||
character_loader: &ReadExpect<'_, CharacterLoader>,
|
||||
terrain: &ReadExpect<'_, TerrainGrid>,
|
||||
network_metrics: &ReadExpect<'_, NetworkRequestMetrics>,
|
||||
player_metrics: &ReadExpect<'_, PlayerMetrics>,
|
||||
uids: &ReadStorage<'_, Uid>,
|
||||
can_build: &ReadStorage<'_, CanBuild>,
|
||||
force_updates: &ReadStorage<'_, ForceUpdate>,
|
||||
@ -133,6 +139,7 @@ impl Sys {
|
||||
|
||||
// Add to list to notify all clients of the new player
|
||||
new_players.push(entity);
|
||||
player_metrics.players_connected.inc();
|
||||
},
|
||||
// Use RequestState instead (No need to send `player` again).
|
||||
_ => client.error_state(RequestStateError::Impossible),
|
||||
@ -312,6 +319,7 @@ impl Sys {
|
||||
},
|
||||
ClientMsg::TerrainChunkRequest { key } => match client.client_state {
|
||||
ClientState::Connected | ClientState::Registered => {
|
||||
network_metrics.chunks_request_dropped.inc();
|
||||
client.error_state(RequestStateError::Impossible);
|
||||
},
|
||||
ClientState::Spectator | ClientState::Character => {
|
||||
@ -329,12 +337,20 @@ impl Sys {
|
||||
};
|
||||
if in_vd {
|
||||
match terrain.get_key(key) {
|
||||
Some(chunk) => client.notify(ServerMsg::TerrainChunkUpdate {
|
||||
key,
|
||||
chunk: Ok(Box::new(chunk.clone())),
|
||||
}),
|
||||
None => server_emitter.emit(ServerEvent::ChunkRequest(entity, key)),
|
||||
Some(chunk) => {
|
||||
network_metrics.chunks_served_from_cache.inc();
|
||||
client.notify(ServerMsg::TerrainChunkUpdate {
|
||||
key,
|
||||
chunk: Ok(Box::new(chunk.clone())),
|
||||
})
|
||||
},
|
||||
None => {
|
||||
network_metrics.chunks_generation_triggered.inc();
|
||||
server_emitter.emit(ServerEvent::ChunkRequest(entity, key))
|
||||
},
|
||||
}
|
||||
} else {
|
||||
network_metrics.chunks_request_dropped.inc();
|
||||
}
|
||||
},
|
||||
ClientState::Pending => {},
|
||||
@ -347,6 +363,10 @@ impl Sys {
|
||||
},
|
||||
ClientMsg::Terminate => {
|
||||
debug!(?entity, "Client send message to termitate session");
|
||||
player_metrics
|
||||
.players_disconnected
|
||||
.with_label_values(&["gracefully"])
|
||||
.inc();
|
||||
server_emitter.emit(ServerEvent::ClientDisconnect(entity));
|
||||
break Ok(());
|
||||
},
|
||||
@ -408,6 +428,8 @@ impl<'a> System<'a> for Sys {
|
||||
Read<'a, Time>,
|
||||
ReadExpect<'a, CharacterLoader>,
|
||||
ReadExpect<'a, TerrainGrid>,
|
||||
ReadExpect<'a, NetworkRequestMetrics>,
|
||||
ReadExpect<'a, PlayerMetrics>,
|
||||
Write<'a, SysTimer<Self>>,
|
||||
ReadStorage<'a, Uid>,
|
||||
ReadStorage<'a, CanBuild>,
|
||||
@ -439,6 +461,8 @@ impl<'a> System<'a> for Sys {
|
||||
time,
|
||||
character_loader,
|
||||
terrain,
|
||||
network_metrics,
|
||||
player_metrics,
|
||||
mut timer,
|
||||
uids,
|
||||
can_build,
|
||||
@ -489,9 +513,7 @@ impl<'a> System<'a> for Sys {
|
||||
|
||||
let network_err: Result<(), crate::error::Error> = block_on(async {
|
||||
//TIMEOUT 0.02 ms for msg handling
|
||||
select!(
|
||||
_ = Delay::new(std::time::Duration::from_micros(20)).fuse() => Ok(()),
|
||||
err = Self::handle_client_msg(
|
||||
let work_future = Self::handle_client_msg(
|
||||
&mut server_emitter,
|
||||
&mut new_chat_msgs,
|
||||
&player_list,
|
||||
@ -499,9 +521,10 @@ impl<'a> System<'a> for Sys {
|
||||
entity,
|
||||
client,
|
||||
&mut cnt,
|
||||
|
||||
&character_loader,
|
||||
&terrain,
|
||||
&network_metrics,
|
||||
&player_metrics,
|
||||
&uids,
|
||||
&can_build,
|
||||
&force_updates,
|
||||
@ -518,7 +541,10 @@ impl<'a> System<'a> for Sys {
|
||||
&mut controllers,
|
||||
&settings,
|
||||
&alias_validator,
|
||||
).fuse() => err,
|
||||
);
|
||||
select!(
|
||||
_ = Delay::new(std::time::Duration::from_micros(20)).fuse() => Ok(()),
|
||||
err = work_future.fuse() => err,
|
||||
)
|
||||
});
|
||||
|
||||
@ -529,11 +555,19 @@ impl<'a> System<'a> for Sys {
|
||||
// Timeout
|
||||
{
|
||||
info!(?entity, "timeout error with client, disconnecting");
|
||||
player_metrics
|
||||
.players_disconnected
|
||||
.with_label_values(&["timeout"])
|
||||
.inc();
|
||||
server_emitter.emit(ServerEvent::ClientDisconnect(entity));
|
||||
} else if network_err.is_err()
|
||||
// Postbox error
|
||||
{
|
||||
debug!(?entity, "postbox error with client, disconnecting");
|
||||
player_metrics
|
||||
.players_disconnected
|
||||
.with_label_values(&["network_error"])
|
||||
.inc();
|
||||
server_emitter.emit(ServerEvent::ClientDisconnect(entity));
|
||||
} else if time.0 - client.last_ping > settings.client_timeout.as_secs() as f64 * 0.5 {
|
||||
// Try pinging the client if the timeout is nearing.
|
||||
|
Loading…
Reference in New Issue
Block a user