feat(storage): library + disk statistics endpoint (§A6)
Docker Build & Publish / build (push) Has been cancelled
Docker Build & Publish / push (push) Has been cancelled
Docker Build & Publish / Prune old image versions (push) Has been cancelled

Implement `GET /api/v1/storage`, replacing the stub. Returns aggregate
library facts (track/artist/album counts, total footprint, playtime,
per-format / per-source / metadata-status breakdowns, top genres) plus
the real capacity of the backing volume.

- domain: `LibraryStats`, `FormatBreakdown`, `DiskUsage` value objects
- ports: `FileStorage.disk_usage()` (local = shutil.disk_usage walking up
  to the nearest existing ancestor; S3 returns None — no fixed disk)
- repo: `TrackRepository.library_stats()` (single set of GROUP BYs)
- tests: storage stats API (auth, empty library, upload counting)

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
Senko-san
2026-06-14 01:19:53 +03:00
parent 636820afb8
commit fa23568214
9 changed files with 371 additions and 5 deletions
+45
View File
@@ -0,0 +1,45 @@
"""Storage / library statistics response schemas (§A6)."""
import datetime as dt
from pydantic import BaseModel
class DiskUsageOut(BaseModel):
total: int
used: int
free: int
class FormatBreakdownOut(BaseModel):
file_format: str
track_count: int
total_size: int
class GenreCountOut(BaseModel):
genre: str
track_count: int
class StorageStatsOut(BaseModel):
"""Everything the Storage screen needs in a single call."""
# library catalogue
total_tracks: int
total_artists: int
total_albums: int
total_size: int
total_duration_seconds: int
largest_track_size: int
earliest_added: dt.datetime | None
latest_added: dt.datetime | None
# breakdowns
by_format: list[FormatBreakdownOut]
by_metadata_status: dict[str, int]
by_source: dict[str, int]
top_genres: list[GenreCountOut]
# backing volume (``None`` for object-store backends)
disk: DiskUsageOut | None
+60 -1
View File
@@ -4,11 +4,70 @@ from typing import Any
from fastapi import APIRouter from fastapi import APIRouter
from app.api.deps import (
AlbumRepoDep,
ArtistRepoDep,
CurrentUser,
FileStorageDep,
TrackRepoDep,
)
from app.api.schemas.storage import (
DiskUsageOut,
FormatBreakdownOut,
GenreCountOut,
StorageStatsOut,
)
router = APIRouter(prefix="/storage", tags=["storage"]) router = APIRouter(prefix="/storage", tags=["storage"])
# How many of the most common genres the dashboard surfaces.
_TOP_GENRES = 8
@router.get("") @router.get("")
async def get_storage_stats() -> Any: ... async def get_storage_stats(
track_repo: TrackRepoDep,
artist_repo: ArtistRepoDep,
album_repo: AlbumRepoDep,
storage: FileStorageDep,
_: CurrentUser,
) -> StorageStatsOut:
"""Library + disk statistics for the Storage dashboard (§A6).
Aggregates come from the catalogue (cheap GROUP BYs); ``disk`` reflects the
real backing volume and is ``None`` for backends without a fixed-capacity
disk (e.g. object stores)."""
stats = await track_repo.library_stats()
total_artists = await artist_repo.count(q=None)
total_albums = await album_repo.count(artist_id=None, q=None)
genres = await track_repo.genres()
disk = await storage.disk_usage()
return StorageStatsOut(
total_tracks=stats.total_tracks,
total_artists=total_artists,
total_albums=total_albums,
total_size=stats.total_size,
total_duration_seconds=stats.total_duration_seconds,
largest_track_size=stats.largest_track_size,
earliest_added=stats.earliest_added,
latest_added=stats.latest_added,
by_format=[
FormatBreakdownOut(
file_format=f.file_format,
track_count=f.track_count,
total_size=f.total_size,
)
for f in stats.by_format
],
by_metadata_status=stats.by_metadata_status,
by_source=stats.by_source,
top_genres=[
GenreCountOut(genre=genre, track_count=count)
for genre, count in genres[:_TOP_GENRES]
],
disk=DiskUsageOut(total=disk.total, used=disk.used, free=disk.free) if disk else None,
)
@router.get("/duplicates") @router.get("/duplicates")
+9 -1
View File
@@ -6,7 +6,12 @@ from app.domain.entities.history import PlayHistoryEntry
from app.domain.entities.like import Like from app.domain.entities.like import Like
from app.domain.entities.metadata import AudioTags, Fingerprint, RecordingMatch from app.domain.entities.metadata import AudioTags, Fingerprint, RecordingMatch
from app.domain.entities.playlist import Playlist from app.domain.entities.playlist import Playlist
from app.domain.entities.storage import ObjectStat from app.domain.entities.storage import (
DiskUsage,
FormatBreakdown,
LibraryStats,
ObjectStat,
)
from app.domain.entities.track import Artist, Track from app.domain.entities.track import Artist, Track
from app.domain.entities.user import Credentials, SubsonicCredentials, User from app.domain.entities.user import Credentials, SubsonicCredentials, User
@@ -16,7 +21,10 @@ __all__ = [
"AudioTags", "AudioTags",
"CoverArt", "CoverArt",
"Credentials", "Credentials",
"DiskUsage",
"Fingerprint", "Fingerprint",
"FormatBreakdown",
"LibraryStats",
"Like", "Like",
"ObjectStat", "ObjectStat",
"PlayHistoryEntry", "PlayHistoryEntry",
+37
View File
@@ -1,5 +1,6 @@
"""Value objects for file storage.""" """Value objects for file storage."""
import datetime as dt
from dataclasses import dataclass from dataclasses import dataclass
@@ -7,3 +8,39 @@ from dataclasses import dataclass
class ObjectStat: class ObjectStat:
size: int size: int
content_type: str | None content_type: str | None
@dataclass(frozen=True, slots=True)
class DiskUsage:
"""Capacity of the volume backing the media store. ``None`` for backends
(e.g. object stores) that expose no notion of total disk capacity."""
total: int
used: int
free: int
@dataclass(frozen=True, slots=True)
class FormatBreakdown:
"""Per-container-format slice of the library (e.g. ``flac`` → 312 tracks)."""
file_format: str
track_count: int
total_size: int
@dataclass(frozen=True, slots=True)
class LibraryStats:
"""Aggregate facts about everything the instance has stored. Computed from
the catalogue (DB), not the filesystem — ``total_size`` is the sum of the
recorded ``file_size`` of every track."""
total_tracks: int
total_size: int
total_duration_seconds: int
by_format: list[FormatBreakdown]
by_metadata_status: dict[str, int]
by_source: dict[str, int]
largest_track_size: int
earliest_added: dt.datetime | None
latest_added: dt.datetime | None
+10 -2
View File
@@ -17,7 +17,9 @@ from app.domain.entities import (
AudioTags, AudioTags,
CoverArt, CoverArt,
Credentials, Credentials,
DiskUsage,
Fingerprint, Fingerprint,
LibraryStats,
Like, Like,
ObjectStat, ObjectStat,
PlayHistoryEntry, PlayHistoryEntry,
@@ -98,6 +100,10 @@ class FileStorage(Protocol):
async def exists(self, key: str) -> bool: ... async def exists(self, key: str) -> bool: ...
async def delete(self, key: str) -> None: ... async def delete(self, key: str) -> None: ...
def as_local_path(self, key: str) -> AbstractAsyncContextManager[Path]: ... def as_local_path(self, key: str) -> AbstractAsyncContextManager[Path]: ...
async def disk_usage(self) -> DiskUsage | None:
"""Capacity of the volume backing the store, or ``None`` when the
backend has no addressable disk (e.g. an object store)."""
...
class ArtistRepository(Protocol): class ArtistRepository(Protocol):
@@ -128,9 +134,11 @@ class TrackRepository(Protocol):
added_by: uuid.UUID | None, added_by: uuid.UUID | None,
) -> Track: ... ) -> Track: ...
async def delete(self, track_id: uuid.UUID) -> None: ... async def delete(self, track_id: uuid.UUID) -> None: ...
# genres must come before ``list`` — the method named ``list`` shadows the # genres / library_stats must come before ``list`` — the method named
# builtin in later annotations (same pattern as AlbumRepository below). # ``list`` shadows the builtin in later annotations (same pattern as
# AlbumRepository below).
async def genres(self) -> list[tuple[str, int]]: ... async def genres(self) -> list[tuple[str, int]]: ...
async def library_stats(self) -> LibraryStats: ...
async def list( async def list(
self, self,
*, *,
@@ -6,6 +6,7 @@ import uuid
from sqlalchemy import func, select from sqlalchemy import func, select
from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.ext.asyncio import AsyncSession
from app.domain.entities.storage import FormatBreakdown, LibraryStats
from app.domain.entities.track import Track from app.domain.entities.track import Track
from app.domain.errors import NotFoundError from app.domain.errors import NotFoundError
from app.infrastructure.db.models.artist import ArtistModel from app.infrastructure.db.models.artist import ArtistModel
@@ -106,6 +107,63 @@ class SqlAlchemyTrackRepository:
).all() ).all()
return [(row.genre, row.cnt) for row in rows] return [(row.genre, row.cnt) for row in rows]
async def library_stats(self) -> LibraryStats:
"""One-shot aggregate over the whole catalogue (no pagination). Defined
before ``list`` for the same shadowing reason as ``genres``."""
totals = (
await self._session.execute(
select(
func.count(TrackModel.id),
func.coalesce(func.sum(TrackModel.file_size), 0),
func.coalesce(func.sum(TrackModel.duration_seconds), 0),
func.coalesce(func.max(TrackModel.file_size), 0),
func.min(TrackModel.created_at),
func.max(TrackModel.created_at),
)
)
).one()
fmt_rows = (
await self._session.execute(
select(
TrackModel.file_format,
func.count(TrackModel.id),
func.coalesce(func.sum(TrackModel.file_size), 0),
)
.group_by(TrackModel.file_format)
.order_by(func.sum(TrackModel.file_size).desc())
)
).all()
status_rows = (
await self._session.execute(
select(TrackModel.metadata_status, func.count(TrackModel.id)).group_by(
TrackModel.metadata_status
)
)
).all()
source_rows = (
await self._session.execute(
select(TrackModel.source, func.count(TrackModel.id)).group_by(TrackModel.source)
)
).all()
return LibraryStats(
total_tracks=totals[0],
total_size=totals[1],
total_duration_seconds=totals[2],
largest_track_size=totals[3],
earliest_added=totals[4],
latest_added=totals[5],
by_format=[
FormatBreakdown(file_format=fmt, track_count=cnt, total_size=size)
for fmt, cnt, size in fmt_rows
],
by_metadata_status={status: cnt for status, cnt in status_rows},
by_source={source: cnt for source, cnt in source_rows},
)
async def list( async def list(
self, self,
*, *,
+10 -1
View File
@@ -8,7 +8,7 @@ from pathlib import Path
import anyio import anyio
from app.domain.entities.storage import ObjectStat from app.domain.entities.storage import DiskUsage, ObjectStat
from app.domain.errors import StorageError from app.domain.errors import StorageError
_EXT_CONTENT_TYPE: dict[str, str] = { _EXT_CONTENT_TYPE: dict[str, str] = {
@@ -78,6 +78,15 @@ class LocalFileStorage:
async def delete(self, key: str) -> None: async def delete(self, key: str) -> None:
(self._media_path / key).unlink(missing_ok=True) (self._media_path / key).unlink(missing_ok=True)
async def disk_usage(self) -> DiskUsage | None:
# The media root may not exist yet on a fresh instance — walk up to the
# nearest existing ancestor so we still report the underlying volume.
path = self._media_path
while not path.exists() and path != path.parent:
path = path.parent
usage = await anyio.to_thread.run_sync(shutil.disk_usage, str(path))
return DiskUsage(total=usage.total, used=usage.used, free=usage.free)
def as_local_path(self, key: str) -> AbstractAsyncContextManager[Path]: def as_local_path(self, key: str) -> AbstractAsyncContextManager[Path]:
return self._as_local_path_cm(key) return self._as_local_path_cm(key)
+4
View File
@@ -127,6 +127,10 @@ class S3FileStorage:
except ClientError as exc: except ClientError as exc:
raise StorageError(str(exc)) from exc raise StorageError(str(exc)) from exc
async def disk_usage(self) -> None:
# Object stores have no fixed-capacity volume to report.
return None
def as_local_path(self, key: str) -> AbstractAsyncContextManager[Path]: def as_local_path(self, key: str) -> AbstractAsyncContextManager[Path]:
return self._as_local_path_cm(key) return self._as_local_path_cm(key)
+138
View File
@@ -0,0 +1,138 @@
"""Integration tests for the storage statistics endpoint (§A6).
Requires a reachable Postgres; skips otherwise.
"""
import asyncio
import os
from collections.abc import AsyncIterator
from pathlib import Path
import pytest
from app.core.config import get_settings
from app.infrastructure.db import Base, dispose_engine, get_engine, session_scope
from app.infrastructure.db.repositories import (
SqlAlchemyRefreshTokenRepository,
SqlAlchemyUserRepository,
)
from asgi_lifespan import LifespanManager
from httpx import ASGITransport, AsyncClient
pytestmark = pytest.mark.asyncio
_db_reachable_cache: bool | None = None
async def _db_reachable() -> bool:
global _db_reachable_cache
if _db_reachable_cache is not None:
return _db_reachable_cache
from sqlalchemy import text
try:
async with asyncio.timeout(3):
async with get_engine().connect() as conn:
await conn.execute(text("SELECT 1"))
_db_reachable_cache = True
except Exception:
_db_reachable_cache = False
return _db_reachable_cache
@pytest.fixture
async def api(tmp_path: Path) -> AsyncIterator[AsyncClient]:
if not await _db_reachable():
pytest.skip("Postgres not reachable — integration test skipped.")
os.environ["MEDIA_PATH"] = str(tmp_path)
get_settings.cache_clear()
import app.infrastructure.storage.provider as _storage_provider
_storage_provider._storage = None
try:
async with get_engine().begin() as conn:
await conn.run_sync(Base.metadata.drop_all)
await conn.run_sync(Base.metadata.create_all)
from app.application.user_service import UserService
from app.core.security import Argon2PasswordHasher
async with session_scope() as session:
await UserService(
users=SqlAlchemyUserRepository(session),
refresh_tokens=SqlAlchemyRefreshTokenRepository(session),
hasher=Argon2PasswordHasher(),
).create_user(username="testuser", password="testpass1", is_superuser=False)
from app.main import create_app
app = create_app()
async with LifespanManager(app):
transport = ASGITransport(app=app)
async with AsyncClient(transport=transport, base_url="http://test") as client:
yield client
async with get_engine().begin() as conn:
await conn.run_sync(Base.metadata.drop_all)
await dispose_engine()
finally:
_storage_provider._storage = None
os.environ.pop("MEDIA_PATH", None)
get_settings.cache_clear()
async def _login(api: AsyncClient) -> str:
resp = await api.post(
"/api/v1/auth/login", json={"username": "testuser", "password": "testpass1"}
)
assert resp.status_code == 200
return str(resp.json()["access_token"])
async def _upload(api: AsyncClient, token: str, *, name: str) -> None:
# Vary the bytes per file so dedup (by content) keeps them distinct.
audio = (f"fake audio bytes for storage stats test {name}".encode()) * 10
resp = await api.post(
"/api/v1/upload",
files={"file": (name, audio, "audio/mpeg")},
headers={"Authorization": f"Bearer {token}"},
)
assert resp.status_code == 200, resp.text
async def test_storage_stats_requires_auth(api: AsyncClient) -> None:
resp = await api.get("/api/v1/storage")
assert resp.status_code == 401
async def test_storage_stats_empty_library(api: AsyncClient) -> None:
token = await _login(api)
resp = await api.get("/api/v1/storage", headers={"Authorization": f"Bearer {token}"})
assert resp.status_code == 200, resp.text
body = resp.json()
assert body["total_tracks"] == 0
assert body["total_size"] == 0
assert body["by_format"] == []
# Local backend reports a real disk in the test environment.
assert body["disk"] is not None
assert body["disk"]["total"] > 0
async def test_storage_stats_counts_uploads(api: AsyncClient) -> None:
token = await _login(api)
await _upload(api, token, name="one.mp3")
await _upload(api, token, name="two.mp3")
resp = await api.get("/api/v1/storage", headers={"Authorization": f"Bearer {token}"})
assert resp.status_code == 200, resp.text
body = resp.json()
assert body["total_tracks"] == 2
assert body["total_size"] > 0
assert body["total_artists"] >= 1
fmt = {f["file_format"]: f for f in body["by_format"]}
assert "mp3" in fmt
assert fmt["mp3"]["track_count"] == 2
assert sum(body["by_source"].values()) == 2