Merge branch 'feature/openmetrics-implementation' into 'dev'

Implementation of OpenMetrics

See merge request crafty-controller/crafty-4!624
This commit is contained in:
Iain Powrie 2023-10-08 22:11:27 +00:00
commit 99f17c1532
13 changed files with 239 additions and 1 deletions

View File

@ -3,6 +3,7 @@
### New features
- Finish and Activate Arcadia notification backend ([Merge Request](https://gitlab.com/crafty-controller/crafty-4/-/merge_requests/621) | [Merge Request](https://gitlab.com/crafty-controller/crafty-4/-/merge_requests/626) | [Merge Request](https://gitlab.com/crafty-controller/crafty-4/-/merge_requests/632))
- Add initial Webhook Notification (Discord, Mattermost, Slack, Teams) ([Merge Request](https://gitlab.com/crafty-controller/crafty-4/-/merge_requests/594))
- Implementation of OpenMetrics endpoints, for use with services such as Prometheus ([Merge Request](https://gitlab.com/crafty-controller/crafty-4/-/merge_requests/624))
### Bug fixes
- PWA: Removed the custom offline page in favour of browser default ([Merge Request](https://gitlab.com/crafty-controller/crafty-4/-/merge_requests/607))
- Fix hidden servers appearing visible on public mobile status page ([Merge Request](https://gitlab.com/crafty-controller/crafty-4/-/merge_requests/612))

View File

@ -1,6 +1,8 @@
import logging
import queue
from prometheus_client import CollectorRegistry, Gauge
from app.classes.models.management import HelpersManagement, HelpersWebhooks
from app.classes.models.servers import HelperServers
@ -11,6 +13,8 @@ class ManagementController:
def __init__(self, management_helper):
self.management_helper = management_helper
self.command_queue = queue.Queue()
self.host_registry = CollectorRegistry()
self.init_host_registries()
# **********************************************************************************
# Config Methods
@ -54,6 +58,19 @@ class ManagementController:
def add_crafty_row():
HelpersManagement.create_crafty_row()
def init_host_registries(self):
# REGISTRY Entries for Server Stats functions
self.cpu_usage = Gauge(
name="CPU_Usage",
documentation="The CPU usage of the server",
registry=self.host_registry,
)
self.mem_usage_percent = Gauge(
name="Mem_Usage",
documentation="The Memory usage of the server",
registry=self.host_registry,
)
# **********************************************************************************
# Commands Methods
# **********************************************************************************

View File

@ -8,6 +8,7 @@ from app.classes.shared.helpers import Helpers
from app.classes.shared.main_models import DatabaseShortcuts
from app.classes.shared.migration import MigrationManager
try:
from peewee import (
SqliteDatabase,

View File

@ -21,6 +21,9 @@ from tzlocal import get_localzone
from apscheduler.schedulers.background import BackgroundScheduler
from apscheduler.jobstores.base import JobLookupError, ConflictingIdError
# OpenMetrics/Prometheus Imports
from prometheus_client import CollectorRegistry, Gauge, Info
from app.classes.minecraft.stats import Stats
from app.classes.minecraft.mc_ping import ping, ping_bedrock
from app.classes.models.servers import HelperServers, Servers
@ -175,6 +178,8 @@ class ServerInstance:
self.server_object = HelperServers.get_server_obj(self.server_id)
self.stats_helper = HelperServerStats(self.server_id)
self.last_backup_failed = False
self.server_registry = CollectorRegistry()
try:
with open(
os.path.join(self.server_object.path, "db_stats", "players_cache.json"),
@ -194,6 +199,7 @@ class ServerInstance:
self.tz = ZoneInfo("Europe/London")
self.server_scheduler = BackgroundScheduler(timezone=str(self.tz))
self.dir_scheduler = BackgroundScheduler(timezone=str(self.tz))
self.init_registries()
self.server_scheduler.start()
self.dir_scheduler.start()
self.start_dir_calc_task()
@ -1850,6 +1856,15 @@ class ServerInstance:
server_stats = self.get_servers_stats()
self.stats_helper.insert_server_stats(server_stats)
self.cpu_usage.labels(f"{self.server_id}").set(server_stats.get("cpu"))
self.mem_usage_percent.labels(f"{self.server_id}").set(
server_stats.get("mem_percent")
)
self.minecraft_version.labels(f"{self.server_id}").info(
{"version": f"{server_stats.get('version')}"}
)
self.online_players.labels(f"{self.server_id}").set(server_stats.get("online"))
# delete old data
max_age = self.helper.get_setting("history_max_age")
now = datetime.datetime.now()
@ -1857,6 +1872,34 @@ class ServerInstance:
self.stats_helper.remove_old_stats(minimum_to_exist)
def init_registries(self):
# REGISTRY Entries for Server Stats functions
self.cpu_usage = Gauge(
name="CPU_Usage",
documentation="The CPU usage of the server",
labelnames=["server_id"],
registry=self.server_registry,
)
self.mem_usage_percent = Gauge(
name="Mem_Usage",
documentation="The Memory usage of the server",
labelnames=["server_id"],
registry=self.server_registry,
)
self.minecraft_version = Info(
name="Minecraft_Version",
documentation="The version of the minecraft of this server",
labelnames=["server_id"],
registry=self.server_registry,
)
self.online_players = Gauge(
name="online_players",
documentation="The number of players online for a server",
labelnames=["server_id"],
registry=self.server_registry,
)
def get_server_history(self):
history = self.stats_helper.get_history_stats(self.server_id, 1)
return history

View File

@ -696,6 +696,12 @@ class TasksManager:
# Stats are different
host_stats = HelpersManagement.get_latest_hosts_stats()
self.controller.management.cpu_usage.set(host_stats.get("cpu_usage"))
self.controller.management.mem_usage_percent.set(
host_stats.get("mem_percent")
)
if len(WebSocketManager().clients) > 0:
# There are clients
try:

View File

@ -11,7 +11,7 @@ from app.classes.shared.helpers import Helpers
from app.classes.shared.file_helpers import FileHelpers
from app.classes.shared.main_controller import Controller
from app.classes.shared.translation import Translation
from app.classes.models.management import DatabaseShortcuts
from app.classes.shared.main_models import DatabaseShortcuts
logger = logging.getLogger(__name__)

View File

@ -0,0 +1,53 @@
import logging
import typing as t
from prometheus_client import REGISTRY, CollectorRegistry
from prometheus_client.exposition import _bake_output
from prometheus_client.exposition import parse_qs, urlparse
from app.classes.web.base_api_handler import BaseApiHandler
logger = logging.getLogger(__name__)
class BaseMetricsHandler(BaseApiHandler):
"""HTTP handler that gives metrics from ``REGISTRY``."""
registry: CollectorRegistry = REGISTRY
# registry.unregister(GC_COLLECTOR)
# registry.unregister(PLATFORM_COLLECTOR)
# registry.unregister(PROCESS_COLLECTOR)
def get_registry(self) -> None:
# Prepare parameters
registry = self.registry
accept_header = self.request.headers.get("Accept")
accept_encoding_header = self.request.headers.get("Accept-Encoding")
params = parse_qs(urlparse(self.request.path).query)
# Bake output
status, headers, output = _bake_output(
registry, accept_header, accept_encoding_header, params, False
)
# Return output
self.finish_metrics(int(status.split(" ", maxsplit=1)[0]), headers, output)
@classmethod
def factory(cls, registry: CollectorRegistry) -> type:
"""Returns a dynamic MetricsHandler class tied
to the passed registry.
"""
# This implementation relies on MetricsHandler.registry
# (defined above and defaulted to REGISTRY).
# As we have unicode_literals, we need to create a str()
# object for type().
cls_name = str(cls.__name__)
MyMetricsHandler = type(cls_name, (cls, object), {"registry": registry})
return MyMetricsHandler
def finish_metrics(self, status: int, headers, data: t.Dict[str, t.Any]):
self.set_status(status)
self.set_header("Content-Type", "text/plain")
for header in headers:
self.set_header(*header)
self.finish(data)

View File

@ -0,0 +1,31 @@
from prometheus_client.exposition import _bake_output
from prometheus_client.exposition import parse_qs, urlparse
from app.classes.web.metrics_handler import BaseMetricsHandler
# Decorate function with metric.
class ApiOpenMetricsCraftyHandler(BaseMetricsHandler):
def get(self):
auth_data = self.authenticate_user()
if not auth_data:
return
if not auth_data[3]:
# if the user doesn't have access to the server, return an error
return self.finish_json(400, {"status": "error", "error": "NOT_AUTHORIZED"})
self.get_registry()
def get_registry(self) -> None:
# Prepare parameters
registry = self.controller.management.host_registry
accept_header = self.request.headers.get("Accept")
accept_encoding_header = self.request.headers.get("Accept-Encoding")
params = parse_qs(urlparse(self.request.path).query)
# Bake output
status, headers, output = _bake_output(
registry, accept_header, accept_encoding_header, params, False
)
# Return output
self.finish_metrics(int(status.split(" ", maxsplit=1)[0]), headers, output)

View File

@ -0,0 +1,21 @@
from prometheus_client import Info
from app.classes.web.metrics_handler import BaseMetricsHandler
CRAFTY_INFO = Info("Crafty_Controller", "Infos of this Crafty Instance")
# Decorate function with metric.
class ApiOpenMetricsIndexHandler(BaseMetricsHandler):
def get(self):
auth_data = self.authenticate_user()
if not auth_data:
return
version = f"{self.helper.get_version().get('major')} \
.{self.helper.get_version().get('minor')} \
.{self.helper.get_version().get('sub')}"
CRAFTY_INFO.info(
{"version": version, "docker": f"{self.helper.is_env_docker()}"}
)
self.get_registry()

View File

@ -0,0 +1,24 @@
from app.classes.web.routes.metrics.index import ApiOpenMetricsIndexHandler
from app.classes.web.routes.metrics.host import ApiOpenMetricsCraftyHandler
from app.classes.web.routes.metrics.servers import ApiOpenMetricsServersHandler
def metrics_handlers(handler_args):
return [
# OpenMetrics routes
(
r"/metrics/?",
ApiOpenMetricsIndexHandler,
handler_args,
),
(
r"/metrics/host/?",
ApiOpenMetricsCraftyHandler,
handler_args,
),
(
r"/metrics/servers/([0-9]+)/?",
ApiOpenMetricsServersHandler,
handler_args,
),
]

View File

@ -0,0 +1,37 @@
from prometheus_client.exposition import _bake_output
from prometheus_client.exposition import parse_qs, urlparse
from app.classes.web.metrics_handler import BaseMetricsHandler
from app.classes.controllers.servers_controller import ServersController
# Decorate function with metric.
class ApiOpenMetricsServersHandler(BaseMetricsHandler):
def get(self, server_id: str):
auth_data = self.authenticate_user()
if not auth_data:
return
if server_id not in [str(x["server_id"]) for x in auth_data[0]]:
# if the user doesn't have access to the server, return an error
return self.finish_json(400, {"status": "error", "error": "NOT_AUTHORIZED"})
self.get_registry(server_id)
def get_registry(self, server_id=None) -> None:
if server_id is None:
return self.finish_json(500, {"status": "error", "error": "UNKNOWN_SERVER"})
# Prepare parameters
registry = (
ServersController().get_server_instance_by_id(server_id).server_registry
)
accept_header = self.request.headers.get("Accept")
accept_encoding_header = self.request.headers.get("Accept-Encoding")
params = parse_qs(urlparse(self.request.path).query)
# Bake output
status, headers, output = _bake_output(
registry, accept_header, accept_encoding_header, params, False
)
# Return output
self.finish_metrics(int(status.split(" ", maxsplit=1)[0]), headers, output)

View File

@ -20,6 +20,7 @@ from app.classes.web.public_handler import PublicHandler
from app.classes.web.panel_handler import PanelHandler
from app.classes.web.default_handler import DefaultHandler
from app.classes.web.routes.api.api_handlers import api_handlers
from app.classes.web.routes.metrics.metrics_handlers import metrics_handlers
from app.classes.web.server_handler import ServerHandler
from app.classes.web.api_handler import (
ServersStats,
@ -174,6 +175,8 @@ class Webserver:
(r"/api/v1/users/delete_user", DeleteUser, handler_args),
# API Routes V2
*api_handlers(handler_args),
# API Routes OpenMetrics
*metrics_handlers(handler_args),
# Using this one at the end
# to catch all the other requests to Public Handler
(r"/(.*)", PublicHandler, handler_args),

View File

@ -19,3 +19,4 @@ tornado==6.3.3
tzlocal==5.1
jsonschema==4.19.1
orjson==3.9.7
prometheus-client==0.17.1