Files
mcma-backend/app/api/health.py
T
2026-06-01 18:47:59 +03:00

84 lines
2.6 KiB
Python

"""Health & readiness endpoints — used by compose healthchecks and the admin UI.
* ``/health`` — liveness: the process is up. Always 200 if serving.
* ``/health/ready`` — readiness: checks DB, Redis, and (optionally) ML.
Returns 503 if a *required* dependency is down. ML is optional — its absence
degrades, never fails, readiness (graceful degradation, see plan §6.5).
"""
import asyncio
from typing import Literal
from fastapi import APIRouter, Response, status
from pydantic import BaseModel
from sqlalchemy import text
from app.core.config import get_settings
from app.core.logging import get_logger
from app.infrastructure.cache import get_redis
from app.infrastructure.db import get_sessionmaker
log = get_logger(__name__)
router = APIRouter(tags=["health"])
CheckStatus = Literal["ok", "down", "skipped"]
# A readiness probe must answer fast and never hang — bound every dependency
# check. A check that exceeds this is reported "down".
CHECK_TIMEOUT_SECONDS = 2.0
class HealthResponse(BaseModel):
status: Literal["ok"] = "ok"
class ReadinessResponse(BaseModel):
status: Literal["ready", "degraded"]
checks: dict[str, CheckStatus]
@router.get("/health", response_model=HealthResponse)
async def health() -> HealthResponse:
return HealthResponse()
async def _check_db() -> CheckStatus:
try:
async with asyncio.timeout(CHECK_TIMEOUT_SECONDS):
async with get_sessionmaker()() as session:
await session.execute(text("SELECT 1"))
return "ok"
except Exception as exc:
log.warning("healthcheck_db_down", error=str(exc))
return "down"
async def _check_redis() -> CheckStatus:
try:
async with asyncio.timeout(CHECK_TIMEOUT_SECONDS):
await get_redis().ping()
return "ok"
except Exception as exc:
log.warning("healthcheck_redis_down", error=str(exc))
return "down"
async def _check_ml() -> CheckStatus:
# Optional dependency. A real client lands in step 12; absence is fine.
return "skipped" if get_settings().ml_service_url is None else "ok"
@router.get("/health/ready", response_model=ReadinessResponse)
async def readiness(response: Response) -> ReadinessResponse:
db, redis, ml = await asyncio.gather(_check_db(), _check_redis(), _check_ml())
checks: dict[str, CheckStatus] = {"database": db, "redis": redis, "ml": ml}
required_down = db == "down" or redis == "down"
if required_down:
response.status_code = status.HTTP_503_SERVICE_UNAVAILABLE
return ReadinessResponse(
status="degraded" if required_down else "ready",
checks=checks,
)