feat(storage): library + disk statistics endpoint (§A6)
Docker Build & Publish / build (push) Has been cancelled
Docker Build & Publish / push (push) Has been cancelled
Docker Build & Publish / Prune old image versions (push) Has been cancelled

Implement `GET /api/v1/storage`, replacing the stub. Returns aggregate
library facts (track/artist/album counts, total footprint, playtime,
per-format / per-source / metadata-status breakdowns, top genres) plus
the real capacity of the backing volume.

- domain: `LibraryStats`, `FormatBreakdown`, `DiskUsage` value objects
- ports: `FileStorage.disk_usage()` (local = shutil.disk_usage walking up
  to the nearest existing ancestor; S3 returns None — no fixed disk)
- repo: `TrackRepository.library_stats()` (single set of GROUP BYs)
- tests: storage stats API (auth, empty library, upload counting)

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
Senko-san
2026-06-14 01:19:53 +03:00
parent 636820afb8
commit fa23568214
9 changed files with 371 additions and 5 deletions
+45
View File
@@ -0,0 +1,45 @@
"""Storage / library statistics response schemas (§A6)."""
import datetime as dt
from pydantic import BaseModel
class DiskUsageOut(BaseModel):
total: int
used: int
free: int
class FormatBreakdownOut(BaseModel):
file_format: str
track_count: int
total_size: int
class GenreCountOut(BaseModel):
genre: str
track_count: int
class StorageStatsOut(BaseModel):
"""Everything the Storage screen needs in a single call."""
# library catalogue
total_tracks: int
total_artists: int
total_albums: int
total_size: int
total_duration_seconds: int
largest_track_size: int
earliest_added: dt.datetime | None
latest_added: dt.datetime | None
# breakdowns
by_format: list[FormatBreakdownOut]
by_metadata_status: dict[str, int]
by_source: dict[str, int]
top_genres: list[GenreCountOut]
# backing volume (``None`` for object-store backends)
disk: DiskUsageOut | None
+60 -1
View File
@@ -4,11 +4,70 @@ from typing import Any
from fastapi import APIRouter
from app.api.deps import (
AlbumRepoDep,
ArtistRepoDep,
CurrentUser,
FileStorageDep,
TrackRepoDep,
)
from app.api.schemas.storage import (
DiskUsageOut,
FormatBreakdownOut,
GenreCountOut,
StorageStatsOut,
)
router = APIRouter(prefix="/storage", tags=["storage"])
# How many of the most common genres the dashboard surfaces.
_TOP_GENRES = 8
@router.get("")
async def get_storage_stats() -> Any: ...
async def get_storage_stats(
track_repo: TrackRepoDep,
artist_repo: ArtistRepoDep,
album_repo: AlbumRepoDep,
storage: FileStorageDep,
_: CurrentUser,
) -> StorageStatsOut:
"""Library + disk statistics for the Storage dashboard (§A6).
Aggregates come from the catalogue (cheap GROUP BYs); ``disk`` reflects the
real backing volume and is ``None`` for backends without a fixed-capacity
disk (e.g. object stores)."""
stats = await track_repo.library_stats()
total_artists = await artist_repo.count(q=None)
total_albums = await album_repo.count(artist_id=None, q=None)
genres = await track_repo.genres()
disk = await storage.disk_usage()
return StorageStatsOut(
total_tracks=stats.total_tracks,
total_artists=total_artists,
total_albums=total_albums,
total_size=stats.total_size,
total_duration_seconds=stats.total_duration_seconds,
largest_track_size=stats.largest_track_size,
earliest_added=stats.earliest_added,
latest_added=stats.latest_added,
by_format=[
FormatBreakdownOut(
file_format=f.file_format,
track_count=f.track_count,
total_size=f.total_size,
)
for f in stats.by_format
],
by_metadata_status=stats.by_metadata_status,
by_source=stats.by_source,
top_genres=[
GenreCountOut(genre=genre, track_count=count)
for genre, count in genres[:_TOP_GENRES]
],
disk=DiskUsageOut(total=disk.total, used=disk.used, free=disk.free) if disk else None,
)
@router.get("/duplicates")
+9 -1
View File
@@ -6,7 +6,12 @@ from app.domain.entities.history import PlayHistoryEntry
from app.domain.entities.like import Like
from app.domain.entities.metadata import AudioTags, Fingerprint, RecordingMatch
from app.domain.entities.playlist import Playlist
from app.domain.entities.storage import ObjectStat
from app.domain.entities.storage import (
DiskUsage,
FormatBreakdown,
LibraryStats,
ObjectStat,
)
from app.domain.entities.track import Artist, Track
from app.domain.entities.user import Credentials, SubsonicCredentials, User
@@ -16,7 +21,10 @@ __all__ = [
"AudioTags",
"CoverArt",
"Credentials",
"DiskUsage",
"Fingerprint",
"FormatBreakdown",
"LibraryStats",
"Like",
"ObjectStat",
"PlayHistoryEntry",
+37
View File
@@ -1,5 +1,6 @@
"""Value objects for file storage."""
import datetime as dt
from dataclasses import dataclass
@@ -7,3 +8,39 @@ from dataclasses import dataclass
class ObjectStat:
size: int
content_type: str | None
@dataclass(frozen=True, slots=True)
class DiskUsage:
"""Capacity of the volume backing the media store. ``None`` for backends
(e.g. object stores) that expose no notion of total disk capacity."""
total: int
used: int
free: int
@dataclass(frozen=True, slots=True)
class FormatBreakdown:
"""Per-container-format slice of the library (e.g. ``flac`` → 312 tracks)."""
file_format: str
track_count: int
total_size: int
@dataclass(frozen=True, slots=True)
class LibraryStats:
"""Aggregate facts about everything the instance has stored. Computed from
the catalogue (DB), not the filesystem — ``total_size`` is the sum of the
recorded ``file_size`` of every track."""
total_tracks: int
total_size: int
total_duration_seconds: int
by_format: list[FormatBreakdown]
by_metadata_status: dict[str, int]
by_source: dict[str, int]
largest_track_size: int
earliest_added: dt.datetime | None
latest_added: dt.datetime | None
+10 -2
View File
@@ -17,7 +17,9 @@ from app.domain.entities import (
AudioTags,
CoverArt,
Credentials,
DiskUsage,
Fingerprint,
LibraryStats,
Like,
ObjectStat,
PlayHistoryEntry,
@@ -98,6 +100,10 @@ class FileStorage(Protocol):
async def exists(self, key: str) -> bool: ...
async def delete(self, key: str) -> None: ...
def as_local_path(self, key: str) -> AbstractAsyncContextManager[Path]: ...
async def disk_usage(self) -> DiskUsage | None:
"""Capacity of the volume backing the store, or ``None`` when the
backend has no addressable disk (e.g. an object store)."""
...
class ArtistRepository(Protocol):
@@ -128,9 +134,11 @@ class TrackRepository(Protocol):
added_by: uuid.UUID | None,
) -> Track: ...
async def delete(self, track_id: uuid.UUID) -> None: ...
# genres must come before ``list`` — the method named ``list`` shadows the
# builtin in later annotations (same pattern as AlbumRepository below).
# genres / library_stats must come before ``list`` — the method named
# ``list`` shadows the builtin in later annotations (same pattern as
# AlbumRepository below).
async def genres(self) -> list[tuple[str, int]]: ...
async def library_stats(self) -> LibraryStats: ...
async def list(
self,
*,
@@ -6,6 +6,7 @@ import uuid
from sqlalchemy import func, select
from sqlalchemy.ext.asyncio import AsyncSession
from app.domain.entities.storage import FormatBreakdown, LibraryStats
from app.domain.entities.track import Track
from app.domain.errors import NotFoundError
from app.infrastructure.db.models.artist import ArtistModel
@@ -106,6 +107,63 @@ class SqlAlchemyTrackRepository:
).all()
return [(row.genre, row.cnt) for row in rows]
async def library_stats(self) -> LibraryStats:
"""One-shot aggregate over the whole catalogue (no pagination). Defined
before ``list`` for the same shadowing reason as ``genres``."""
totals = (
await self._session.execute(
select(
func.count(TrackModel.id),
func.coalesce(func.sum(TrackModel.file_size), 0),
func.coalesce(func.sum(TrackModel.duration_seconds), 0),
func.coalesce(func.max(TrackModel.file_size), 0),
func.min(TrackModel.created_at),
func.max(TrackModel.created_at),
)
)
).one()
fmt_rows = (
await self._session.execute(
select(
TrackModel.file_format,
func.count(TrackModel.id),
func.coalesce(func.sum(TrackModel.file_size), 0),
)
.group_by(TrackModel.file_format)
.order_by(func.sum(TrackModel.file_size).desc())
)
).all()
status_rows = (
await self._session.execute(
select(TrackModel.metadata_status, func.count(TrackModel.id)).group_by(
TrackModel.metadata_status
)
)
).all()
source_rows = (
await self._session.execute(
select(TrackModel.source, func.count(TrackModel.id)).group_by(TrackModel.source)
)
).all()
return LibraryStats(
total_tracks=totals[0],
total_size=totals[1],
total_duration_seconds=totals[2],
largest_track_size=totals[3],
earliest_added=totals[4],
latest_added=totals[5],
by_format=[
FormatBreakdown(file_format=fmt, track_count=cnt, total_size=size)
for fmt, cnt, size in fmt_rows
],
by_metadata_status={status: cnt for status, cnt in status_rows},
by_source={source: cnt for source, cnt in source_rows},
)
async def list(
self,
*,
+10 -1
View File
@@ -8,7 +8,7 @@ from pathlib import Path
import anyio
from app.domain.entities.storage import ObjectStat
from app.domain.entities.storage import DiskUsage, ObjectStat
from app.domain.errors import StorageError
_EXT_CONTENT_TYPE: dict[str, str] = {
@@ -78,6 +78,15 @@ class LocalFileStorage:
async def delete(self, key: str) -> None:
(self._media_path / key).unlink(missing_ok=True)
async def disk_usage(self) -> DiskUsage | None:
# The media root may not exist yet on a fresh instance — walk up to the
# nearest existing ancestor so we still report the underlying volume.
path = self._media_path
while not path.exists() and path != path.parent:
path = path.parent
usage = await anyio.to_thread.run_sync(shutil.disk_usage, str(path))
return DiskUsage(total=usage.total, used=usage.used, free=usage.free)
def as_local_path(self, key: str) -> AbstractAsyncContextManager[Path]:
return self._as_local_path_cm(key)
+4
View File
@@ -127,6 +127,10 @@ class S3FileStorage:
except ClientError as exc:
raise StorageError(str(exc)) from exc
async def disk_usage(self) -> None:
# Object stores have no fixed-capacity volume to report.
return None
def as_local_path(self, key: str) -> AbstractAsyncContextManager[Path]:
return self._as_local_path_cm(key)
+138
View File
@@ -0,0 +1,138 @@
"""Integration tests for the storage statistics endpoint (§A6).
Requires a reachable Postgres; skips otherwise.
"""
import asyncio
import os
from collections.abc import AsyncIterator
from pathlib import Path
import pytest
from app.core.config import get_settings
from app.infrastructure.db import Base, dispose_engine, get_engine, session_scope
from app.infrastructure.db.repositories import (
SqlAlchemyRefreshTokenRepository,
SqlAlchemyUserRepository,
)
from asgi_lifespan import LifespanManager
from httpx import ASGITransport, AsyncClient
pytestmark = pytest.mark.asyncio
_db_reachable_cache: bool | None = None
async def _db_reachable() -> bool:
global _db_reachable_cache
if _db_reachable_cache is not None:
return _db_reachable_cache
from sqlalchemy import text
try:
async with asyncio.timeout(3):
async with get_engine().connect() as conn:
await conn.execute(text("SELECT 1"))
_db_reachable_cache = True
except Exception:
_db_reachable_cache = False
return _db_reachable_cache
@pytest.fixture
async def api(tmp_path: Path) -> AsyncIterator[AsyncClient]:
if not await _db_reachable():
pytest.skip("Postgres not reachable — integration test skipped.")
os.environ["MEDIA_PATH"] = str(tmp_path)
get_settings.cache_clear()
import app.infrastructure.storage.provider as _storage_provider
_storage_provider._storage = None
try:
async with get_engine().begin() as conn:
await conn.run_sync(Base.metadata.drop_all)
await conn.run_sync(Base.metadata.create_all)
from app.application.user_service import UserService
from app.core.security import Argon2PasswordHasher
async with session_scope() as session:
await UserService(
users=SqlAlchemyUserRepository(session),
refresh_tokens=SqlAlchemyRefreshTokenRepository(session),
hasher=Argon2PasswordHasher(),
).create_user(username="testuser", password="testpass1", is_superuser=False)
from app.main import create_app
app = create_app()
async with LifespanManager(app):
transport = ASGITransport(app=app)
async with AsyncClient(transport=transport, base_url="http://test") as client:
yield client
async with get_engine().begin() as conn:
await conn.run_sync(Base.metadata.drop_all)
await dispose_engine()
finally:
_storage_provider._storage = None
os.environ.pop("MEDIA_PATH", None)
get_settings.cache_clear()
async def _login(api: AsyncClient) -> str:
resp = await api.post(
"/api/v1/auth/login", json={"username": "testuser", "password": "testpass1"}
)
assert resp.status_code == 200
return str(resp.json()["access_token"])
async def _upload(api: AsyncClient, token: str, *, name: str) -> None:
# Vary the bytes per file so dedup (by content) keeps them distinct.
audio = (f"fake audio bytes for storage stats test {name}".encode()) * 10
resp = await api.post(
"/api/v1/upload",
files={"file": (name, audio, "audio/mpeg")},
headers={"Authorization": f"Bearer {token}"},
)
assert resp.status_code == 200, resp.text
async def test_storage_stats_requires_auth(api: AsyncClient) -> None:
resp = await api.get("/api/v1/storage")
assert resp.status_code == 401
async def test_storage_stats_empty_library(api: AsyncClient) -> None:
token = await _login(api)
resp = await api.get("/api/v1/storage", headers={"Authorization": f"Bearer {token}"})
assert resp.status_code == 200, resp.text
body = resp.json()
assert body["total_tracks"] == 0
assert body["total_size"] == 0
assert body["by_format"] == []
# Local backend reports a real disk in the test environment.
assert body["disk"] is not None
assert body["disk"]["total"] > 0
async def test_storage_stats_counts_uploads(api: AsyncClient) -> None:
token = await _login(api)
await _upload(api, token, name="one.mp3")
await _upload(api, token, name="two.mp3")
resp = await api.get("/api/v1/storage", headers={"Authorization": f"Bearer {token}"})
assert resp.status_code == 200, resp.text
body = resp.json()
assert body["total_tracks"] == 2
assert body["total_size"] > 0
assert body["total_artists"] >= 1
fmt = {f["file_format"]: f for f in body["by_format"]}
assert "mp3" in fmt
assert fmt["mp3"]["track_count"] == 2
assert sum(body["by_source"].values()) == 2