From fa23568214c13a5963e62007b77b28aa50a4c46d Mon Sep 17 00:00:00 2001 From: Senko-san Date: Sun, 14 Jun 2026 01:19:53 +0300 Subject: [PATCH] =?UTF-8?q?feat(storage):=20library=20+=20disk=20statistic?= =?UTF-8?q?s=20endpoint=20(=C2=A7A6)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implement `GET /api/v1/storage`, replacing the stub. Returns aggregate library facts (track/artist/album counts, total footprint, playtime, per-format / per-source / metadata-status breakdowns, top genres) plus the real capacity of the backing volume. - domain: `LibraryStats`, `FormatBreakdown`, `DiskUsage` value objects - ports: `FileStorage.disk_usage()` (local = shutil.disk_usage walking up to the nearest existing ancestor; S3 returns None — no fixed disk) - repo: `TrackRepository.library_stats()` (single set of GROUP BYs) - tests: storage stats API (auth, empty library, upload counting) Co-Authored-By: Claude Opus 4.8 --- app/api/schemas/storage.py | 45 ++++++ app/api/v1/storage.py | 61 +++++++- app/domain/entities/__init__.py | 10 +- app/domain/entities/storage.py | 37 +++++ app/domain/ports.py | 12 +- .../db/repositories/track_repository.py | 58 ++++++++ app/infrastructure/storage/local.py | 11 +- app/infrastructure/storage/s3.py | 4 + tests/test_storage_stats_api.py | 138 ++++++++++++++++++ 9 files changed, 371 insertions(+), 5 deletions(-) create mode 100644 app/api/schemas/storage.py create mode 100644 tests/test_storage_stats_api.py diff --git a/app/api/schemas/storage.py b/app/api/schemas/storage.py new file mode 100644 index 0000000..710407d --- /dev/null +++ b/app/api/schemas/storage.py @@ -0,0 +1,45 @@ +"""Storage / library statistics response schemas (§A6).""" + +import datetime as dt + +from pydantic import BaseModel + + +class DiskUsageOut(BaseModel): + total: int + used: int + free: int + + +class FormatBreakdownOut(BaseModel): + file_format: str + track_count: int + total_size: int + + +class GenreCountOut(BaseModel): + genre: str + track_count: int + + +class StorageStatsOut(BaseModel): + """Everything the Storage screen needs in a single call.""" + + # library catalogue + total_tracks: int + total_artists: int + total_albums: int + total_size: int + total_duration_seconds: int + largest_track_size: int + earliest_added: dt.datetime | None + latest_added: dt.datetime | None + + # breakdowns + by_format: list[FormatBreakdownOut] + by_metadata_status: dict[str, int] + by_source: dict[str, int] + top_genres: list[GenreCountOut] + + # backing volume (``None`` for object-store backends) + disk: DiskUsageOut | None diff --git a/app/api/v1/storage.py b/app/api/v1/storage.py index 643242a..ca90874 100644 --- a/app/api/v1/storage.py +++ b/app/api/v1/storage.py @@ -4,11 +4,70 @@ from typing import Any from fastapi import APIRouter +from app.api.deps import ( + AlbumRepoDep, + ArtistRepoDep, + CurrentUser, + FileStorageDep, + TrackRepoDep, +) +from app.api.schemas.storage import ( + DiskUsageOut, + FormatBreakdownOut, + GenreCountOut, + StorageStatsOut, +) + router = APIRouter(prefix="/storage", tags=["storage"]) +# How many of the most common genres the dashboard surfaces. +_TOP_GENRES = 8 + @router.get("") -async def get_storage_stats() -> Any: ... +async def get_storage_stats( + track_repo: TrackRepoDep, + artist_repo: ArtistRepoDep, + album_repo: AlbumRepoDep, + storage: FileStorageDep, + _: CurrentUser, +) -> StorageStatsOut: + """Library + disk statistics for the Storage dashboard (§A6). + + Aggregates come from the catalogue (cheap GROUP BYs); ``disk`` reflects the + real backing volume and is ``None`` for backends without a fixed-capacity + disk (e.g. object stores).""" + stats = await track_repo.library_stats() + total_artists = await artist_repo.count(q=None) + total_albums = await album_repo.count(artist_id=None, q=None) + genres = await track_repo.genres() + disk = await storage.disk_usage() + + return StorageStatsOut( + total_tracks=stats.total_tracks, + total_artists=total_artists, + total_albums=total_albums, + total_size=stats.total_size, + total_duration_seconds=stats.total_duration_seconds, + largest_track_size=stats.largest_track_size, + earliest_added=stats.earliest_added, + latest_added=stats.latest_added, + by_format=[ + FormatBreakdownOut( + file_format=f.file_format, + track_count=f.track_count, + total_size=f.total_size, + ) + for f in stats.by_format + ], + by_metadata_status=stats.by_metadata_status, + by_source=stats.by_source, + top_genres=[ + GenreCountOut(genre=genre, track_count=count) + for genre, count in genres[:_TOP_GENRES] + ], + disk=DiskUsageOut(total=disk.total, used=disk.used, free=disk.free) if disk else None, + ) @router.get("/duplicates") diff --git a/app/domain/entities/__init__.py b/app/domain/entities/__init__.py index 6003713..63e364a 100644 --- a/app/domain/entities/__init__.py +++ b/app/domain/entities/__init__.py @@ -6,7 +6,12 @@ from app.domain.entities.history import PlayHistoryEntry from app.domain.entities.like import Like from app.domain.entities.metadata import AudioTags, Fingerprint, RecordingMatch from app.domain.entities.playlist import Playlist -from app.domain.entities.storage import ObjectStat +from app.domain.entities.storage import ( + DiskUsage, + FormatBreakdown, + LibraryStats, + ObjectStat, +) from app.domain.entities.track import Artist, Track from app.domain.entities.user import Credentials, SubsonicCredentials, User @@ -16,7 +21,10 @@ __all__ = [ "AudioTags", "CoverArt", "Credentials", + "DiskUsage", "Fingerprint", + "FormatBreakdown", + "LibraryStats", "Like", "ObjectStat", "PlayHistoryEntry", diff --git a/app/domain/entities/storage.py b/app/domain/entities/storage.py index 8a00444..bcdca34 100644 --- a/app/domain/entities/storage.py +++ b/app/domain/entities/storage.py @@ -1,5 +1,6 @@ """Value objects for file storage.""" +import datetime as dt from dataclasses import dataclass @@ -7,3 +8,39 @@ from dataclasses import dataclass class ObjectStat: size: int content_type: str | None + + +@dataclass(frozen=True, slots=True) +class DiskUsage: + """Capacity of the volume backing the media store. ``None`` for backends + (e.g. object stores) that expose no notion of total disk capacity.""" + + total: int + used: int + free: int + + +@dataclass(frozen=True, slots=True) +class FormatBreakdown: + """Per-container-format slice of the library (e.g. ``flac`` → 312 tracks).""" + + file_format: str + track_count: int + total_size: int + + +@dataclass(frozen=True, slots=True) +class LibraryStats: + """Aggregate facts about everything the instance has stored. Computed from + the catalogue (DB), not the filesystem — ``total_size`` is the sum of the + recorded ``file_size`` of every track.""" + + total_tracks: int + total_size: int + total_duration_seconds: int + by_format: list[FormatBreakdown] + by_metadata_status: dict[str, int] + by_source: dict[str, int] + largest_track_size: int + earliest_added: dt.datetime | None + latest_added: dt.datetime | None diff --git a/app/domain/ports.py b/app/domain/ports.py index 5e73f9d..0ce8192 100644 --- a/app/domain/ports.py +++ b/app/domain/ports.py @@ -17,7 +17,9 @@ from app.domain.entities import ( AudioTags, CoverArt, Credentials, + DiskUsage, Fingerprint, + LibraryStats, Like, ObjectStat, PlayHistoryEntry, @@ -98,6 +100,10 @@ class FileStorage(Protocol): async def exists(self, key: str) -> bool: ... async def delete(self, key: str) -> None: ... def as_local_path(self, key: str) -> AbstractAsyncContextManager[Path]: ... + async def disk_usage(self) -> DiskUsage | None: + """Capacity of the volume backing the store, or ``None`` when the + backend has no addressable disk (e.g. an object store).""" + ... class ArtistRepository(Protocol): @@ -128,9 +134,11 @@ class TrackRepository(Protocol): added_by: uuid.UUID | None, ) -> Track: ... async def delete(self, track_id: uuid.UUID) -> None: ... - # genres must come before ``list`` — the method named ``list`` shadows the - # builtin in later annotations (same pattern as AlbumRepository below). + # genres / library_stats must come before ``list`` — the method named + # ``list`` shadows the builtin in later annotations (same pattern as + # AlbumRepository below). async def genres(self) -> list[tuple[str, int]]: ... + async def library_stats(self) -> LibraryStats: ... async def list( self, *, diff --git a/app/infrastructure/db/repositories/track_repository.py b/app/infrastructure/db/repositories/track_repository.py index 00a5c4c..72ec7cb 100644 --- a/app/infrastructure/db/repositories/track_repository.py +++ b/app/infrastructure/db/repositories/track_repository.py @@ -6,6 +6,7 @@ import uuid from sqlalchemy import func, select from sqlalchemy.ext.asyncio import AsyncSession +from app.domain.entities.storage import FormatBreakdown, LibraryStats from app.domain.entities.track import Track from app.domain.errors import NotFoundError from app.infrastructure.db.models.artist import ArtistModel @@ -106,6 +107,63 @@ class SqlAlchemyTrackRepository: ).all() return [(row.genre, row.cnt) for row in rows] + async def library_stats(self) -> LibraryStats: + """One-shot aggregate over the whole catalogue (no pagination). Defined + before ``list`` for the same shadowing reason as ``genres``.""" + totals = ( + await self._session.execute( + select( + func.count(TrackModel.id), + func.coalesce(func.sum(TrackModel.file_size), 0), + func.coalesce(func.sum(TrackModel.duration_seconds), 0), + func.coalesce(func.max(TrackModel.file_size), 0), + func.min(TrackModel.created_at), + func.max(TrackModel.created_at), + ) + ) + ).one() + + fmt_rows = ( + await self._session.execute( + select( + TrackModel.file_format, + func.count(TrackModel.id), + func.coalesce(func.sum(TrackModel.file_size), 0), + ) + .group_by(TrackModel.file_format) + .order_by(func.sum(TrackModel.file_size).desc()) + ) + ).all() + + status_rows = ( + await self._session.execute( + select(TrackModel.metadata_status, func.count(TrackModel.id)).group_by( + TrackModel.metadata_status + ) + ) + ).all() + + source_rows = ( + await self._session.execute( + select(TrackModel.source, func.count(TrackModel.id)).group_by(TrackModel.source) + ) + ).all() + + return LibraryStats( + total_tracks=totals[0], + total_size=totals[1], + total_duration_seconds=totals[2], + largest_track_size=totals[3], + earliest_added=totals[4], + latest_added=totals[5], + by_format=[ + FormatBreakdown(file_format=fmt, track_count=cnt, total_size=size) + for fmt, cnt, size in fmt_rows + ], + by_metadata_status={status: cnt for status, cnt in status_rows}, + by_source={source: cnt for source, cnt in source_rows}, + ) + async def list( self, *, diff --git a/app/infrastructure/storage/local.py b/app/infrastructure/storage/local.py index cfa456e..d5fa207 100644 --- a/app/infrastructure/storage/local.py +++ b/app/infrastructure/storage/local.py @@ -8,7 +8,7 @@ from pathlib import Path import anyio -from app.domain.entities.storage import ObjectStat +from app.domain.entities.storage import DiskUsage, ObjectStat from app.domain.errors import StorageError _EXT_CONTENT_TYPE: dict[str, str] = { @@ -78,6 +78,15 @@ class LocalFileStorage: async def delete(self, key: str) -> None: (self._media_path / key).unlink(missing_ok=True) + async def disk_usage(self) -> DiskUsage | None: + # The media root may not exist yet on a fresh instance — walk up to the + # nearest existing ancestor so we still report the underlying volume. + path = self._media_path + while not path.exists() and path != path.parent: + path = path.parent + usage = await anyio.to_thread.run_sync(shutil.disk_usage, str(path)) + return DiskUsage(total=usage.total, used=usage.used, free=usage.free) + def as_local_path(self, key: str) -> AbstractAsyncContextManager[Path]: return self._as_local_path_cm(key) diff --git a/app/infrastructure/storage/s3.py b/app/infrastructure/storage/s3.py index 19ebc0a..35a2a32 100644 --- a/app/infrastructure/storage/s3.py +++ b/app/infrastructure/storage/s3.py @@ -127,6 +127,10 @@ class S3FileStorage: except ClientError as exc: raise StorageError(str(exc)) from exc + async def disk_usage(self) -> None: + # Object stores have no fixed-capacity volume to report. + return None + def as_local_path(self, key: str) -> AbstractAsyncContextManager[Path]: return self._as_local_path_cm(key) diff --git a/tests/test_storage_stats_api.py b/tests/test_storage_stats_api.py new file mode 100644 index 0000000..8766914 --- /dev/null +++ b/tests/test_storage_stats_api.py @@ -0,0 +1,138 @@ +"""Integration tests for the storage statistics endpoint (§A6). + +Requires a reachable Postgres; skips otherwise. +""" + +import asyncio +import os +from collections.abc import AsyncIterator +from pathlib import Path + +import pytest +from app.core.config import get_settings +from app.infrastructure.db import Base, dispose_engine, get_engine, session_scope +from app.infrastructure.db.repositories import ( + SqlAlchemyRefreshTokenRepository, + SqlAlchemyUserRepository, +) +from asgi_lifespan import LifespanManager +from httpx import ASGITransport, AsyncClient + +pytestmark = pytest.mark.asyncio + +_db_reachable_cache: bool | None = None + + +async def _db_reachable() -> bool: + global _db_reachable_cache + if _db_reachable_cache is not None: + return _db_reachable_cache + + from sqlalchemy import text + + try: + async with asyncio.timeout(3): + async with get_engine().connect() as conn: + await conn.execute(text("SELECT 1")) + _db_reachable_cache = True + except Exception: + _db_reachable_cache = False + return _db_reachable_cache + + +@pytest.fixture +async def api(tmp_path: Path) -> AsyncIterator[AsyncClient]: + if not await _db_reachable(): + pytest.skip("Postgres not reachable — integration test skipped.") + + os.environ["MEDIA_PATH"] = str(tmp_path) + get_settings.cache_clear() + + import app.infrastructure.storage.provider as _storage_provider + + _storage_provider._storage = None + + try: + async with get_engine().begin() as conn: + await conn.run_sync(Base.metadata.drop_all) + await conn.run_sync(Base.metadata.create_all) + + from app.application.user_service import UserService + from app.core.security import Argon2PasswordHasher + + async with session_scope() as session: + await UserService( + users=SqlAlchemyUserRepository(session), + refresh_tokens=SqlAlchemyRefreshTokenRepository(session), + hasher=Argon2PasswordHasher(), + ).create_user(username="testuser", password="testpass1", is_superuser=False) + + from app.main import create_app + + app = create_app() + async with LifespanManager(app): + transport = ASGITransport(app=app) + async with AsyncClient(transport=transport, base_url="http://test") as client: + yield client + + async with get_engine().begin() as conn: + await conn.run_sync(Base.metadata.drop_all) + await dispose_engine() + finally: + _storage_provider._storage = None + os.environ.pop("MEDIA_PATH", None) + get_settings.cache_clear() + + +async def _login(api: AsyncClient) -> str: + resp = await api.post( + "/api/v1/auth/login", json={"username": "testuser", "password": "testpass1"} + ) + assert resp.status_code == 200 + return str(resp.json()["access_token"]) + + +async def _upload(api: AsyncClient, token: str, *, name: str) -> None: + # Vary the bytes per file so dedup (by content) keeps them distinct. + audio = (f"fake audio bytes for storage stats test {name}".encode()) * 10 + resp = await api.post( + "/api/v1/upload", + files={"file": (name, audio, "audio/mpeg")}, + headers={"Authorization": f"Bearer {token}"}, + ) + assert resp.status_code == 200, resp.text + + +async def test_storage_stats_requires_auth(api: AsyncClient) -> None: + resp = await api.get("/api/v1/storage") + assert resp.status_code == 401 + + +async def test_storage_stats_empty_library(api: AsyncClient) -> None: + token = await _login(api) + resp = await api.get("/api/v1/storage", headers={"Authorization": f"Bearer {token}"}) + assert resp.status_code == 200, resp.text + body = resp.json() + assert body["total_tracks"] == 0 + assert body["total_size"] == 0 + assert body["by_format"] == [] + # Local backend reports a real disk in the test environment. + assert body["disk"] is not None + assert body["disk"]["total"] > 0 + + +async def test_storage_stats_counts_uploads(api: AsyncClient) -> None: + token = await _login(api) + await _upload(api, token, name="one.mp3") + await _upload(api, token, name="two.mp3") + + resp = await api.get("/api/v1/storage", headers={"Authorization": f"Bearer {token}"}) + assert resp.status_code == 200, resp.text + body = resp.json() + assert body["total_tracks"] == 2 + assert body["total_size"] > 0 + assert body["total_artists"] >= 1 + fmt = {f["file_format"]: f for f in body["by_format"]} + assert "mp3" in fmt + assert fmt["mp3"]["track_count"] == 2 + assert sum(body["by_source"].values()) == 2