Project started 🍾
This commit is contained in:
@@ -0,0 +1,83 @@
|
||||
"""Health & readiness endpoints — used by compose healthchecks and the admin UI.
|
||||
|
||||
* ``/health`` — liveness: the process is up. Always 200 if serving.
|
||||
* ``/health/ready`` — readiness: checks DB, Redis, and (optionally) ML.
|
||||
Returns 503 if a *required* dependency is down. ML is optional — its absence
|
||||
degrades, never fails, readiness (graceful degradation, see plan §6.5).
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
from typing import Literal
|
||||
|
||||
from fastapi import APIRouter, Response, status
|
||||
from pydantic import BaseModel
|
||||
from sqlalchemy import text
|
||||
|
||||
from app.core.config import get_settings
|
||||
from app.core.logging import get_logger
|
||||
from app.infrastructure.cache import get_redis
|
||||
from app.infrastructure.db import get_sessionmaker
|
||||
|
||||
log = get_logger(__name__)
|
||||
router = APIRouter(tags=["health"])
|
||||
|
||||
CheckStatus = Literal["ok", "down", "skipped"]
|
||||
|
||||
# A readiness probe must answer fast and never hang — bound every dependency
|
||||
# check. A check that exceeds this is reported "down".
|
||||
CHECK_TIMEOUT_SECONDS = 2.0
|
||||
|
||||
|
||||
class HealthResponse(BaseModel):
|
||||
status: Literal["ok"] = "ok"
|
||||
|
||||
|
||||
class ReadinessResponse(BaseModel):
|
||||
status: Literal["ready", "degraded"]
|
||||
checks: dict[str, CheckStatus]
|
||||
|
||||
|
||||
@router.get("/health", response_model=HealthResponse)
|
||||
async def health() -> HealthResponse:
|
||||
return HealthResponse()
|
||||
|
||||
|
||||
async def _check_db() -> CheckStatus:
|
||||
try:
|
||||
async with asyncio.timeout(CHECK_TIMEOUT_SECONDS):
|
||||
async with get_sessionmaker()() as session:
|
||||
await session.execute(text("SELECT 1"))
|
||||
return "ok"
|
||||
except Exception as exc:
|
||||
log.warning("healthcheck_db_down", error=str(exc))
|
||||
return "down"
|
||||
|
||||
|
||||
async def _check_redis() -> CheckStatus:
|
||||
try:
|
||||
async with asyncio.timeout(CHECK_TIMEOUT_SECONDS):
|
||||
await get_redis().ping()
|
||||
return "ok"
|
||||
except Exception as exc:
|
||||
log.warning("healthcheck_redis_down", error=str(exc))
|
||||
return "down"
|
||||
|
||||
|
||||
async def _check_ml() -> CheckStatus:
|
||||
# Optional dependency. A real client lands in step 12; absence is fine.
|
||||
return "skipped" if get_settings().ml_service_url is None else "ok"
|
||||
|
||||
|
||||
@router.get("/health/ready", response_model=ReadinessResponse)
|
||||
async def readiness(response: Response) -> ReadinessResponse:
|
||||
db, redis, ml = await asyncio.gather(_check_db(), _check_redis(), _check_ml())
|
||||
checks: dict[str, CheckStatus] = {"database": db, "redis": redis, "ml": ml}
|
||||
|
||||
required_down = db == "down" or redis == "down"
|
||||
if required_down:
|
||||
response.status_code = status.HTTP_503_SERVICE_UNAVAILABLE
|
||||
|
||||
return ReadinessResponse(
|
||||
status="degraded" if required_down else "ready",
|
||||
checks=checks,
|
||||
)
|
||||
Reference in New Issue
Block a user