84 lines
2.6 KiB
Python
84 lines
2.6 KiB
Python
"""Health & readiness endpoints — used by compose healthchecks and the admin UI.
|
|
|
|
* ``/health`` — liveness: the process is up. Always 200 if serving.
|
|
* ``/health/ready`` — readiness: checks DB, Redis, and (optionally) ML.
|
|
Returns 503 if a *required* dependency is down. ML is optional — its absence
|
|
degrades, never fails, readiness (graceful degradation, see plan §6.5).
|
|
"""
|
|
|
|
import asyncio
|
|
from typing import Literal
|
|
|
|
from fastapi import APIRouter, Response, status
|
|
from pydantic import BaseModel
|
|
from sqlalchemy import text
|
|
|
|
from app.core.config import get_settings
|
|
from app.core.logging import get_logger
|
|
from app.infrastructure.cache import get_redis
|
|
from app.infrastructure.db import get_sessionmaker
|
|
|
|
log = get_logger(__name__)
|
|
router = APIRouter(tags=["health"])
|
|
|
|
CheckStatus = Literal["ok", "down", "skipped"]
|
|
|
|
# A readiness probe must answer fast and never hang — bound every dependency
|
|
# check. A check that exceeds this is reported "down".
|
|
CHECK_TIMEOUT_SECONDS = 2.0
|
|
|
|
|
|
class HealthResponse(BaseModel):
|
|
status: Literal["ok"] = "ok"
|
|
|
|
|
|
class ReadinessResponse(BaseModel):
|
|
status: Literal["ready", "degraded"]
|
|
checks: dict[str, CheckStatus]
|
|
|
|
|
|
@router.get("/health", response_model=HealthResponse)
|
|
async def health() -> HealthResponse:
|
|
return HealthResponse()
|
|
|
|
|
|
async def _check_db() -> CheckStatus:
|
|
try:
|
|
async with asyncio.timeout(CHECK_TIMEOUT_SECONDS):
|
|
async with get_sessionmaker()() as session:
|
|
await session.execute(text("SELECT 1"))
|
|
return "ok"
|
|
except Exception as exc:
|
|
log.warning("healthcheck_db_down", error=str(exc))
|
|
return "down"
|
|
|
|
|
|
async def _check_redis() -> CheckStatus:
|
|
try:
|
|
async with asyncio.timeout(CHECK_TIMEOUT_SECONDS):
|
|
await get_redis().ping()
|
|
return "ok"
|
|
except Exception as exc:
|
|
log.warning("healthcheck_redis_down", error=str(exc))
|
|
return "down"
|
|
|
|
|
|
async def _check_ml() -> CheckStatus:
|
|
# Optional dependency. A real client lands in step 12; absence is fine.
|
|
return "skipped" if get_settings().ml_service_url is None else "ok"
|
|
|
|
|
|
@router.get("/health/ready", response_model=ReadinessResponse)
|
|
async def readiness(response: Response) -> ReadinessResponse:
|
|
db, redis, ml = await asyncio.gather(_check_db(), _check_redis(), _check_ml())
|
|
checks: dict[str, CheckStatus] = {"database": db, "redis": redis, "ml": ml}
|
|
|
|
required_down = db == "down" or redis == "down"
|
|
if required_down:
|
|
response.status_code = status.HTTP_503_SERVICE_UNAVAILABLE
|
|
|
|
return ReadinessResponse(
|
|
status="degraded" if required_down else "ready",
|
|
checks=checks,
|
|
)
|