Improve OpenMetrics Registries Work

This commit is contained in:
Silversthorn 2023-10-07 23:50:56 +02:00
parent eb6d9560f8
commit 49914c7f14
8 changed files with 144 additions and 17 deletions

View File

@ -1,6 +1,8 @@
import logging
import queue
from prometheus_client import CollectorRegistry, Gauge
from app.classes.models.management import HelpersManagement
from app.classes.models.servers import HelperServers
@ -11,6 +13,8 @@ class ManagementController:
def __init__(self, management_helper):
self.management_helper = management_helper
self.command_queue = queue.Queue()
self.host_registry = CollectorRegistry()
self.init_host_registries()
# **********************************************************************************
# Config Methods
@ -54,6 +58,19 @@ class ManagementController:
def add_crafty_row():
HelpersManagement.create_crafty_row()
def init_host_registries(self):
# REGISTRY Entries for Server Stats functions
self.cpu_usage = Gauge(
name="CPU_Usage",
documentation="The CPU usage of the server",
registry=self.host_registry,
)
self.mem_usage_percent = Gauge(
name="Mem_Usage",
documentation="The Memory usage of the server",
registry=self.host_registry,
)
# **********************************************************************************
# Commands Methods
# **********************************************************************************

View File

@ -3,8 +3,6 @@ import logging
import datetime
from datetime import timedelta
from prometheus_client import Gauge
from app.classes.models.servers import Servers, HelperServers
from app.classes.shared.helpers import Helpers
from app.classes.shared.main_models import DatabaseShortcuts
@ -32,13 +30,6 @@ logger = logging.getLogger(__name__)
peewee_logger = logging.getLogger("peewee")
peewee_logger.setLevel(logging.INFO)
# REGISTRY Entries for Server Stats functions
ONLINE_PLAYERS = Gauge(
name="online_players",
documentation="The number of players online for a server",
labelnames=["server_id"],
)
# **********************************************************************************
# Servers Stats Class
@ -167,8 +158,6 @@ class HelperServerStats:
self.database.connect(reuse_if_open=True)
server_id = server_stats.get("id", 0)
ONLINE_PLAYERS.labels(f"{self.server_id}").set(server_stats.get("online"))
if server_id == 0:
logger.warning("Stats saving failed with error: Server unknown (id = 0)")
return

View File

@ -21,6 +21,9 @@ from tzlocal.utils import ZoneInfoNotFoundError
from apscheduler.schedulers.background import BackgroundScheduler
from apscheduler.jobstores.base import JobLookupError
# OpenMetrics/Prometheus Imports
from prometheus_client import CollectorRegistry, Gauge, Info
from app.classes.minecraft.stats import Stats
from app.classes.minecraft.mc_ping import ping, ping_bedrock
from app.classes.models.servers import HelperServers, Servers
@ -133,6 +136,8 @@ class ServerInstance:
self.server_object = HelperServers.get_server_obj(self.server_id)
self.stats_helper = HelperServerStats(self.server_id)
self.last_backup_failed = False
self.server_registry = CollectorRegistry()
try:
with open(
os.path.join(self.server_object.path, "db_stats", "players_cache.json"),
@ -152,6 +157,7 @@ class ServerInstance:
self.tz = ZoneInfo("Europe/London")
self.server_scheduler = BackgroundScheduler(timezone=str(self.tz))
self.dir_scheduler = BackgroundScheduler(timezone=str(self.tz))
self.init_registries()
self.server_scheduler.start()
self.dir_scheduler.start()
self.start_dir_calc_task()
@ -1799,9 +1805,46 @@ class ServerInstance:
server_stats = self.get_servers_stats()
self.stats_helper.insert_server_stats(server_stats)
self.cpu_usage.labels(f"{self.server_id}").set(server_stats.get("cpu"))
self.mem_usage_percent.labels(f"{self.server_id}").set(
server_stats.get("mem_percent")
)
self.minecraft_version.labels(f"{self.server_id}").info(
{"version": f"{server_stats.get('version')}"}
)
self.online_players.labels(f"{self.server_id}").set(server_stats.get("online"))
# delete old data
max_age = self.helper.get_setting("history_max_age")
now = datetime.datetime.now()
minimum_to_exist = now - datetime.timedelta(days=max_age)
self.stats_helper.remove_old_stats(minimum_to_exist)
def init_registries(self):
# REGISTRY Entries for Server Stats functions
self.cpu_usage = Gauge(
name="CPU_Usage",
documentation="The CPU usage of the server",
labelnames=["server_id"],
registry=self.server_registry,
)
self.mem_usage_percent = Gauge(
name="Mem_Usage",
documentation="The Memory usage of the server",
labelnames=["server_id"],
registry=self.server_registry,
)
self.minecraft_version = Info(
name="Minecraft_Version",
documentation="The version of the minecraft of this server",
labelnames=["server_id"],
registry=self.server_registry,
)
self.online_players = Gauge(
name="online_players",
documentation="The number of players online for a server",
labelnames=["server_id"],
registry=self.server_registry,
)

View File

@ -45,6 +45,7 @@ class TasksManager:
self.helper: Helpers = helper
self.controller: Controller = controller
self.tornado: Webserver = Webserver(helper, controller, self)
try:
self.tz = get_localzone()
except ZoneInfoNotFoundError as e:
@ -688,6 +689,12 @@ class TasksManager:
# Stats are different
host_stats = HelpersManagement.get_latest_hosts_stats()
self.controller.management.cpu_usage.set(host_stats.get("cpu_usage"))
self.controller.management.mem_usage_percent.set(
host_stats.get("mem_percent")
)
if len(self.helper.websocket_helper.clients) > 0:
# There are clients
try:

View File

@ -1,7 +1,13 @@
import logging
import typing as t
from prometheus_client import REGISTRY, CollectorRegistry
from prometheus_client import (
REGISTRY,
CollectorRegistry,
GC_COLLECTOR,
PLATFORM_COLLECTOR,
PROCESS_COLLECTOR,
)
from prometheus_client.exposition import _bake_output
from prometheus_client.exposition import parse_qs, urlparse
@ -14,6 +20,9 @@ class BaseMetricsHandler(BaseApiHandler):
"""HTTP handler that gives metrics from ``REGISTRY``."""
registry: CollectorRegistry = REGISTRY
# registry.unregister(GC_COLLECTOR)
# registry.unregister(PLATFORM_COLLECTOR)
# registry.unregister(PROCESS_COLLECTOR)
def get_registry(self) -> None:
# Prepare parameters

View File

@ -0,0 +1,33 @@
from prometheus_client import Gauge
from prometheus_client.exposition import _bake_output
from prometheus_client.exposition import parse_qs, urlparse
from app.classes.web.metrics_handler import BaseMetricsHandler
from app.classes.controllers.servers_controller import ServersController
# Decorate function with metric.
class ApiOpenMetricsCraftyHandler(BaseMetricsHandler):
def get(self):
auth_data = self.authenticate_user()
if not auth_data:
return
if not auth_data[3]:
# if the user doesn't have access to the server, return an error
return self.finish_json(400, {"status": "error", "error": "NOT_AUTHORIZED"})
self.get_registry()
def get_registry(self) -> None:
# Prepare parameters
registry = self.controller.management.host_registry
accept_header = self.request.headers.get("Accept")
accept_encoding_header = self.request.headers.get("Accept-Encoding")
params = parse_qs(urlparse(self.request.path).query)
# Bake output
status, headers, output = _bake_output(
registry, accept_header, accept_encoding_header, params, False
)
# Return output
self.finish_metrics(int(status.split(" ", maxsplit=1)[0]), headers, output)

View File

@ -1,4 +1,5 @@
from app.classes.web.routes.metrics.index import ApiOpenMetricsIndexHandler
from app.classes.web.routes.metrics.host import ApiOpenMetricsCraftyHandler
from app.classes.web.routes.metrics.servers import ApiOpenMetricsServersHandler
@ -6,12 +7,17 @@ def metrics_handlers(handler_args):
return [
# OpenMetrics routes
(
r"/metrics?",
r"/metrics/?",
ApiOpenMetricsIndexHandler,
handler_args,
),
(
r"/metrics/servers/(0-9)+?",
r"/metrics/host/?",
ApiOpenMetricsCraftyHandler,
handler_args,
),
(
r"/metrics/servers/([0-9]+)/?",
ApiOpenMetricsServersHandler,
handler_args,
),

View File

@ -1,12 +1,35 @@
from prometheus_client import Histogram
from prometheus_client import Gauge
from prometheus_client.exposition import _bake_output
from prometheus_client.exposition import parse_qs, urlparse
from app.classes.web.metrics_handler import BaseMetricsHandler
from app.classes.controllers.servers_controller import ServersController
# Decorate function with metric.
class ApiOpenMetricsServersHandler(BaseMetricsHandler):
def get(self):
def get(self, server_id: str):
auth_data = self.authenticate_user()
if not auth_data:
return
self.get_registry()
if server_id not in [str(x["server_id"]) for x in auth_data[0]]:
# if the user doesn't have access to the server, return an error
return self.finish_json(400, {"status": "error", "error": "NOT_AUTHORIZED"})
self.get_registry(server_id)
def get_registry(self, server_id) -> None:
# Prepare parameters
registry = (
ServersController().get_server_instance_by_id(server_id).server_registry
)
accept_header = self.request.headers.get("Accept")
accept_encoding_header = self.request.headers.get("Accept-Encoding")
params = parse_qs(urlparse(self.request.path).query)
# Bake output
status, headers, output = _bake_output(
registry, accept_header, accept_encoding_header, params, False
)
# Return output
self.finish_metrics(int(status.split(" ", maxsplit=1)[0]), headers, output)