"""Health & readiness endpoints — used by compose healthchecks and the admin UI. * ``/health`` — liveness: the process is up. Always 200 if serving. * ``/health/ready`` — readiness: checks DB, Redis, and (optionally) ML. Returns 503 if a *required* dependency is down. ML is optional — its absence degrades, never fails, readiness (graceful degradation, see plan §6.5). """ import asyncio from typing import Literal from fastapi import APIRouter, Response, status from pydantic import BaseModel from sqlalchemy import text from app.core.config import get_settings from app.core.logging import get_logger from app.infrastructure.cache import get_redis from app.infrastructure.db import get_sessionmaker log = get_logger(__name__) router = APIRouter(tags=["health"]) CheckStatus = Literal["ok", "down", "skipped"] # A readiness probe must answer fast and never hang — bound every dependency # check. A check that exceeds this is reported "down". CHECK_TIMEOUT_SECONDS = 2.0 class HealthResponse(BaseModel): status: Literal["ok"] = "ok" class ReadinessResponse(BaseModel): status: Literal["ready", "degraded"] checks: dict[str, CheckStatus] @router.get("/health", response_model=HealthResponse) async def health() -> HealthResponse: return HealthResponse() async def _check_db() -> CheckStatus: try: async with asyncio.timeout(CHECK_TIMEOUT_SECONDS): async with get_sessionmaker()() as session: await session.execute(text("SELECT 1")) return "ok" except Exception as exc: log.warning("healthcheck_db_down", error=str(exc)) return "down" async def _check_redis() -> CheckStatus: try: async with asyncio.timeout(CHECK_TIMEOUT_SECONDS): await get_redis().ping() return "ok" except Exception as exc: log.warning("healthcheck_redis_down", error=str(exc)) return "down" async def _check_ml() -> CheckStatus: # Optional dependency. A real client lands in step 12; absence is fine. return "skipped" if get_settings().ml_service_url is None else "ok" @router.get("/health/ready", response_model=ReadinessResponse) async def readiness(response: Response) -> ReadinessResponse: db, redis, ml = await asyncio.gather(_check_db(), _check_redis(), _check_ml()) checks: dict[str, CheckStatus] = {"database": db, "redis": redis, "ml": ml} required_down = db == "down" or redis == "down" if required_down: response.status_code = status.HTTP_503_SERVICE_UNAVAILABLE return ReadinessResponse( status="degraded" if required_down else "ready", checks=checks, )