diff --git a/app/api/covers.py b/app/api/covers.py new file mode 100644 index 0000000..c14a246 --- /dev/null +++ b/app/api/covers.py @@ -0,0 +1,57 @@ +"""Shared cover-art serving helper (presentation). + +Streams a stored cover image from the :class:`FileStorage` port. Used by the +native ``/api/v1`` cover endpoints and the Subsonic ``getCoverArt`` adapter so +the streaming/content-type logic lives in one place. +""" + +import uuid + +from fastapi.responses import StreamingResponse + +from app.domain.entities.album import Album +from app.domain.errors import NotFoundError, StorageError +from app.domain.ports import AlbumRepository, FileStorage, TrackRepository + +_CONTENT_TYPE_BY_EXT: dict[str, str] = { + "jpg": "image/jpeg", + "jpeg": "image/jpeg", + "png": "image/png", + "webp": "image/webp", + "gif": "image/gif", +} + +# Covers are immutable for a given album (a new cover means a new key), so let +# clients cache aggressively. +_CACHE_CONTROL = "public, max-age=86400" + + +def _content_type_for(key: str) -> str: + ext = key.rsplit(".", 1)[-1].lower() if "." in key else "" + return _CONTENT_TYPE_BY_EXT.get(ext, "application/octet-stream") + + +async def stream_cover(storage: FileStorage, cover_path: str) -> StreamingResponse: + """Stream a stored cover by its storage key. Raises ``NotFoundError`` if the + object is missing (a dangling ``cover_path`` reads as "no cover").""" + try: + stream, total = await storage.open_range(cover_path, 0, None) + except StorageError as exc: + raise NotFoundError("Cover not found.") from exc + return StreamingResponse( + stream, + media_type=_content_type_for(cover_path), + headers={"Content-Length": str(total), "Cache-Control": _CACHE_CONTROL}, + ) + + +async def resolve_album_for_track( + track_repo: TrackRepository, + album_repo: AlbumRepository, + track_id: uuid.UUID, +) -> Album | None: + """The album that owns a track (cover lives on the album), or ``None``.""" + track = await track_repo.get_by_id(track_id) + if track is None or track.album_id is None: + return None + return await album_repo.get_by_id(track.album_id) diff --git a/app/api/rest/media.py b/app/api/rest/media.py index 4fcb974..ba439c5 100644 --- a/app/api/rest/media.py +++ b/app/api/rest/media.py @@ -13,9 +13,17 @@ from typing import Annotated from fastapi import APIRouter, Header, Query from fastapi.responses import Response, StreamingResponse -from app.api.deps import StreamingServiceDep, SubsonicUser, TrackRepoDep -from app.api.rest.ids import decode_track, parse -from app.domain.errors import NotFoundError +from app.api.covers import resolve_album_for_track, stream_cover +from app.api.deps import ( + AlbumRepoDep, + FileStorageDep, + StreamingServiceDep, + SubsonicUser, + TrackRepoDep, +) +from app.api.rest.ids import IdKind, decode_track, parse +from app.domain.entities.album import Album +from app.domain.errors import NotFoundError, StorageError router = APIRouter() @@ -69,10 +77,28 @@ async def download( @router.api_route("/getCoverArt.view", methods=["GET", "POST"]) async def get_cover_art( _user: SubsonicUser, + album_repo: AlbumRepoDep, + track_repo: TrackRepoDep, + storage: FileStorageDep, id: Annotated[str, Query()], size: Annotated[int | None, Query()] = None, ) -> Response: - # Validate the id shape so clients get a clean error on garbage, then serve a - # placeholder. TODO: stream real covers once the cover pipeline exists. - parse(id) + # Cover ids reuse the entity id: ``al-`` (album) or ``tr-`` + # (track → its album). Unlike the native API, Subsonic clients expect an + # image either way, so a missing cover falls back to a placeholder rather + # than 404. ``size`` is accepted but ignored (we serve the stored image). + kind, value = parse(id) + album: Album | None + if kind is IdKind.ALBUM: + album = await album_repo.get_by_id(value) + elif kind is IdKind.TRACK: + album = await resolve_album_for_track(track_repo, album_repo, value) + else: + album = None + + if album is not None and album.cover_path: + try: + return await stream_cover(storage, album.cover_path) + except NotFoundError, StorageError: + pass return Response(content=_PLACEHOLDER_PNG, media_type="image/png") diff --git a/app/api/schemas/album.py b/app/api/schemas/album.py index 833d6c5..e03adf3 100644 --- a/app/api/schemas/album.py +++ b/app/api/schemas/album.py @@ -13,4 +13,5 @@ class AlbumOut(BaseModel): artist_name: str year: int | None track_count: int + has_cover: bool created_at: dt.datetime diff --git a/app/api/schemas/track.py b/app/api/schemas/track.py index 0dbd5a5..3a4c9bc 100644 --- a/app/api/schemas/track.py +++ b/app/api/schemas/track.py @@ -18,6 +18,7 @@ class TrackOut(BaseModel): file_size: int metadata_status: str source: str + has_cover: bool created_at: dt.datetime diff --git a/app/api/v1/albums.py b/app/api/v1/albums.py index 27a3ff3..47bea9a 100644 --- a/app/api/v1/albums.py +++ b/app/api/v1/albums.py @@ -1,11 +1,19 @@ """Album endpoints.""" import uuid -from typing import Any from fastapi import APIRouter, Query +from fastapi.responses import StreamingResponse -from app.api.deps import AlbumRepoDep, ArtistRepoDep, CurrentUser, TrackRepoDep +from app.api.covers import stream_cover +from app.api.deps import ( + AlbumRepoDep, + ArtistRepoDep, + CurrentUser, + FileStorageDep, + StreamUser, + TrackRepoDep, +) from app.api.schemas.album import AlbumOut from app.api.schemas.pagination import PagedResponse from app.api.schemas.track import TrackOut @@ -30,6 +38,7 @@ async def _build_album_out( artist_name=artists[a.artist_id].name if a.artist_id in artists else "Unknown Artist", year=a.year, track_count=track_counts.get(a.id, 0), + has_cover=bool(a.cover_path), created_at=a.created_at, ) for a in albums @@ -109,4 +118,14 @@ async def get_album_tracks( @router.get("/{album_id}/cover") -async def get_album_cover(album_id: uuid.UUID, _: CurrentUser) -> Any: ... +async def get_album_cover( + album_id: uuid.UUID, + album_repo: AlbumRepoDep, + storage: FileStorageDep, + _: StreamUser, +) -> StreamingResponse: + # ```` can't send a bearer header → StreamUser accepts ``?token=``. + album = await album_repo.get_by_id(album_id) + if album is None or not album.cover_path: + raise NotFoundError("Cover not found.") + return await stream_cover(storage, album.cover_path) diff --git a/app/api/v1/tracks.py b/app/api/v1/tracks.py index dc4c655..efa49d7 100644 --- a/app/api/v1/tracks.py +++ b/app/api/v1/tracks.py @@ -4,8 +4,17 @@ import uuid from typing import Any from fastapi import APIRouter, Query, Response +from fastapi.responses import StreamingResponse -from app.api.deps import AlbumRepoDep, ArtistRepoDep, CurrentUser, FileStorageDep, TrackRepoDep +from app.api.covers import resolve_album_for_track, stream_cover +from app.api.deps import ( + AlbumRepoDep, + ArtistRepoDep, + CurrentUser, + FileStorageDep, + StreamUser, + TrackRepoDep, +) from app.api.schemas.pagination import PagedResponse from app.api.schemas.track import TrackOut, TrackUpdate from app.domain.entities.album import Album @@ -34,6 +43,7 @@ async def _build_track_out( file_size=t.file_size, metadata_status=t.metadata_status, source=t.source, + has_cover=bool(t.album_id and albums.get(t.album_id) and albums[t.album_id].cover_path), created_at=t.created_at, ) for t in tracks @@ -144,7 +154,19 @@ async def optimize_track(track_id: uuid.UUID, _: CurrentUser) -> Any: ... @router.get("/{track_id}/cover") -async def get_track_cover(track_id: uuid.UUID, _: CurrentUser) -> Any: ... +async def get_track_cover( + track_id: uuid.UUID, + track_repo: TrackRepoDep, + album_repo: AlbumRepoDep, + storage: FileStorageDep, + _: StreamUser, +) -> StreamingResponse: + # A track's cover is its album's cover. ```` can't send a bearer + # header → StreamUser accepts ``?token=``. + album = await resolve_album_for_track(track_repo, album_repo, track_id) + if album is None or not album.cover_path: + raise NotFoundError("Cover not found.") + return await stream_cover(storage, album.cover_path) @router.post("/{track_id}/metadata/enrich") diff --git a/app/application/metadata_service.py b/app/application/metadata_service.py index c58ddd6..707fb85 100644 --- a/app/application/metadata_service.py +++ b/app/application/metadata_service.py @@ -12,10 +12,14 @@ Invariants (plan §6.2, CLAUDE.md): - **Idempotent** — re-running only fills gaps; ``apply_enrichment`` never erases. """ +import tempfile import uuid from dataclasses import dataclass +from pathlib import Path from app.core.logging import get_logger +from app.domain.entities.album import Album +from app.domain.entities.cover import CoverArt from app.domain.entities.metadata import AudioTags, RecordingMatch from app.domain.ports import ( AcoustIdClient, @@ -23,6 +27,8 @@ from app.domain.ports import ( ArtistRepository, AudioFingerprinter, AudioTagReader, + CoverArtExtractor, + CoverArtProvider, FileStorage, TrackRepository, ) @@ -50,6 +56,8 @@ class MetadataEnrichmentService: tag_reader: AudioTagReader, fingerprinter: AudioFingerprinter, acoustid: AcoustIdClient, + cover_extractor: CoverArtExtractor | None = None, + cover_provider: CoverArtProvider | None = None, ) -> None: self._tracks = tracks self._artists = artists @@ -58,6 +66,8 @@ class MetadataEnrichmentService: self._tag_reader = tag_reader self._fingerprinter = fingerprinter self._acoustid = acoustid + self._cover_extractor = cover_extractor + self._cover_provider = cover_provider async def enrich(self, track_id: uuid.UUID) -> EnrichmentResult: track = await self._tracks.get_by_id(track_id) @@ -92,7 +102,15 @@ class MetadataEnrichmentService: acoustid_id = match.acoustid if match else None artist_id = await self._resolve_artist(artist_name, fallback=track.artist_id) - album_id = await self._resolve_album(album_title, artist_id=artist_id, year=year, mbid=mbid) + album = await self._resolve_album(album_title, artist_id=artist_id, year=year, mbid=mbid) + album_id = album.id if album is not None else None + + if album is not None: + await self._resolve_cover( + album, + storage_uri=track.storage_uri, + release_group_mbid=match.release_group_mbid if match else None, + ) identified = bool(artist_name) or album_id is not None or mbid is not None status = "enriched" if identified else "failed" @@ -148,16 +166,71 @@ class MetadataEnrichmentService: artist_id: uuid.UUID, year: int | None, mbid: str | None, - ) -> uuid.UUID | None: + ) -> Album | None: if not title: return None - album = await self._albums.get_or_create( + return await self._albums.get_or_create( title=title, artist_id=artist_id, year=year, musicbrainz_id=mbid, ) - return album.id + + async def _resolve_cover( + self, + album: Album, + *, + storage_uri: str, + release_group_mbid: str | None, + ) -> None: + """Fill in an album cover when it has none. Source order mirrors the + tag-first pipeline: embedded artwork (offline) → Cover Art Archive + (network, by release-group). Best-effort — any failure is swallowed so a + missing cover never affects enrichment status.""" + if album.cover_path: + return # already has one — never overwrite (idempotent) + + cover = await self._extract_cover(storage_uri) + if cover is None: + cover = await self._fetch_cover(release_group_mbid) + if cover is None: + return + + try: + key = await self._save_cover(album.id, cover) + await self._albums.set_cover_path(album.id, key) + log.info("cover_resolved", album_id=str(album.id), content_type=cover.content_type) + except Exception: + log.warning("cover_save_failed", album_id=str(album.id)) + + async def _extract_cover(self, storage_uri: str) -> CoverArt | None: + if self._cover_extractor is None: + return None + try: + async with self._storage.as_local_path(storage_uri) as path: + return await self._cover_extractor.extract(path) + except Exception: + log.warning("cover_extract_step_failed", storage_uri=storage_uri) + return None + + async def _fetch_cover(self, release_group_mbid: str | None) -> CoverArt | None: + if self._cover_provider is None or not release_group_mbid: + return None + if not self._cover_provider.is_available(): + return None + try: + return await self._cover_provider.fetch_release_group(release_group_mbid) + except Exception: + log.warning("cover_fetch_step_failed", release_group=release_group_mbid) + return None + + async def _save_cover(self, album_id: uuid.UUID, cover: CoverArt) -> str: + key = f"covers/{album_id}.{cover.extension}" + with tempfile.NamedTemporaryFile(suffix=f".{cover.extension}") as tmp: + tmp.write(cover.data) + tmp.flush() + await self._storage.save_file(key, Path(tmp.name)) + return key def _opt_str(*values: str | None) -> str | None: diff --git a/app/core/config.py b/app/core/config.py index 729ae45..0ccb749 100644 --- a/app/core/config.py +++ b/app/core/config.py @@ -103,6 +103,11 @@ class Settings(BaseSettings): # image installs it via libchromaprint-tools. fpcalc_path: str = "fpcalc" + # Cover Art Archive — network fallback for album covers (after embedded art). + # Disable to keep enrichment fully offline; embedded artwork still works. + coverart_enabled: bool = True + coverart_base_url: str = "https://coverartarchive.org" + @field_validator("database_url") @classmethod def _require_async_driver(cls, v: str) -> str: diff --git a/app/domain/entities/__init__.py b/app/domain/entities/__init__.py index f4e2814..6003713 100644 --- a/app/domain/entities/__init__.py +++ b/app/domain/entities/__init__.py @@ -1,6 +1,7 @@ """Domain entities and value objects — pure, framework-free.""" from app.domain.entities.album import Album +from app.domain.entities.cover import CoverArt from app.domain.entities.history import PlayHistoryEntry from app.domain.entities.like import Like from app.domain.entities.metadata import AudioTags, Fingerprint, RecordingMatch @@ -13,6 +14,7 @@ __all__ = [ "Album", "Artist", "AudioTags", + "CoverArt", "Credentials", "Fingerprint", "Like", diff --git a/app/domain/entities/cover.py b/app/domain/entities/cover.py new file mode 100644 index 0000000..6a93fdf --- /dev/null +++ b/app/domain/entities/cover.py @@ -0,0 +1,28 @@ +"""Cover-art value object — raw image bytes plus their MIME type. + +Crosses the domain boundary between the cover sources (embedded extractor, +Cover Art Archive) and the storage/serving layers. The bytes are the encoded +image as-is; we never decode/resize in Phase 1. +""" + +from dataclasses import dataclass + + +@dataclass(frozen=True, slots=True) +class CoverArt: + data: bytes + content_type: str # "image/jpeg" | "image/png" | … + + @property + def extension(self) -> str: + """File extension for the content type (no leading dot).""" + return _EXT_BY_TYPE.get(self.content_type.lower(), "jpg") + + +_EXT_BY_TYPE: dict[str, str] = { + "image/jpeg": "jpg", + "image/jpg": "jpg", + "image/png": "png", + "image/webp": "webp", + "image/gif": "gif", +} diff --git a/app/domain/entities/metadata.py b/app/domain/entities/metadata.py index 9f622db..e5a1f1f 100644 --- a/app/domain/entities/metadata.py +++ b/app/domain/entities/metadata.py @@ -47,6 +47,7 @@ class RecordingMatch: acoustid: str score: float recording_mbid: str | None = None + release_group_mbid: str | None = None title: str | None = None artist: str | None = None album: str | None = None diff --git a/app/domain/ports.py b/app/domain/ports.py index 38bf2eb..72fdb0b 100644 --- a/app/domain/ports.py +++ b/app/domain/ports.py @@ -15,6 +15,7 @@ from typing import Protocol from app.domain.entities import ( Album, AudioTags, + CoverArt, Credentials, Fingerprint, Like, @@ -188,6 +189,7 @@ class AlbumRepository(Protocol): year: int | None, musicbrainz_id: str | None, ) -> Album: ... + async def set_cover_path(self, album_id: uuid.UUID, cover_path: str) -> None: ... async def get_by_id(self, album_id: uuid.UUID) -> Album | None: ... async def get_many(self, ids: list[uuid.UUID]) -> list[Album]: ... async def count(self, *, artist_id: uuid.UUID | None, q: str | None) -> int: ... @@ -297,3 +299,19 @@ class AcoustIdClient(Protocol): def is_available(self) -> bool: ... async def lookup(self, fingerprint: Fingerprint) -> RecordingMatch | None: ... + + +class CoverArtExtractor(Protocol): + """Pulls embedded cover art out of a local audio file (offline, no network). + Returns ``None`` when the file has no picture or can't be parsed — never raises.""" + + async def extract(self, path: Path) -> CoverArt | None: ... + + +class CoverArtProvider(Protocol): + """Fetches cover art from an external service (Cover Art Archive) by + MusicBrainz release-group id. ``is_available`` may gate it off; ``fetch`` + returns ``None`` (not found / service down), never raising.""" + + def is_available(self) -> bool: ... + async def fetch_release_group(self, release_group_mbid: str) -> CoverArt | None: ... diff --git a/app/infrastructure/db/repositories/album_repository.py b/app/infrastructure/db/repositories/album_repository.py index 1d49530..f34e980 100644 --- a/app/infrastructure/db/repositories/album_repository.py +++ b/app/infrastructure/db/repositories/album_repository.py @@ -63,6 +63,12 @@ class SqlAlchemyAlbumRepository: await self._session.refresh(row) return _to_entity(row) + async def set_cover_path(self, album_id: uuid.UUID, cover_path: str) -> None: + row = await self._session.get(AlbumModel, album_id) + if row is not None: + row.cover_path = cover_path + await self._session.flush() + async def get_by_id(self, album_id: uuid.UUID) -> Album | None: row = await self._session.get(AlbumModel, album_id) return _to_entity(row) if row is not None else None diff --git a/app/infrastructure/metadata/acoustid.py b/app/infrastructure/metadata/acoustid.py index cde2da6..b501acc 100644 --- a/app/infrastructure/metadata/acoustid.py +++ b/app/infrastructure/metadata/acoustid.py @@ -66,7 +66,7 @@ class AcoustIdHttpClient: ) resp.raise_for_status() payload = resp.json() - except (httpx.HTTPError, ValueError): + except httpx.HTTPError, ValueError: log.warning("acoustid_lookup_failed") return None @@ -100,6 +100,7 @@ def _parse_best_match(payload: object) -> RecordingMatch | None: score = float(best.get("score", 0.0)) recording_mbid: str | None = None + release_group_mbid: str | None = None title: str | None = None artist: str | None = None album: str | None = None @@ -115,13 +116,17 @@ def _parse_best_match(payload: object) -> RecordingMatch | None: artist = name if isinstance(name, str) else None groups = rec.get("releasegroups") if isinstance(groups, list) and groups and isinstance(groups[0], dict): - gtitle = groups[0].get("title") + group = groups[0] + gtitle = group.get("title") album = gtitle if isinstance(gtitle, str) else None + gid = group.get("id") + release_group_mbid = gid if isinstance(gid, str) else None return RecordingMatch( acoustid=acoustid, score=score, recording_mbid=recording_mbid, + release_group_mbid=release_group_mbid, title=title, artist=artist, album=album, diff --git a/app/infrastructure/metadata/cover_extractor.py b/app/infrastructure/metadata/cover_extractor.py new file mode 100644 index 0000000..863aac4 --- /dev/null +++ b/app/infrastructure/metadata/cover_extractor.py @@ -0,0 +1,111 @@ +"""MutagenCoverExtractor — pulls embedded cover art from a local audio file. + +The offline-first cover source (mirrors the tag pre-pass): a well-tagged file +often already carries front-cover artwork (ID3 ``APIC``, FLAC/OGG picture +blocks, MP4 ``covr``). We read it without any network call. Parsing is blocking, +so it runs in a worker thread. Any failure degrades to ``None`` — never raises. + +mutagen ships no type stubs, so its objects are handled as ``Any`` and accessed +defensively (``getattr``) — the format zoo doesn't fit one static shape anyway. +""" + +import base64 +from pathlib import Path +from typing import Any + +import anyio +from mutagen import File as MutagenFile # type: ignore[attr-defined] +from mutagen.flac import Picture +from mutagen.mp4 import MP4Cover + +from app.core.logging import get_logger +from app.domain.entities.cover import CoverArt + +log = get_logger(__name__) + +# MP4 cover format flag → MIME (mutagen exposes an int, not a content type). +_MP4_FORMATS: dict[int, str] = { + MP4Cover.FORMAT_JPEG: "image/jpeg", + MP4Cover.FORMAT_PNG: "image/png", +} +_FRONT_COVER = 3 # APIC/Picture "type" value for the front cover + + +class MutagenCoverExtractor: + """Implements :class:`app.domain.ports.CoverArtExtractor`.""" + + async def extract(self, path: Path) -> CoverArt | None: + try: + return await anyio.to_thread.run_sync(self._extract_sync, path) + except Exception: + log.warning("cover_extract_failed", path=str(path)) + return None + + def _extract_sync(self, path: Path) -> CoverArt | None: + audio: Any = MutagenFile(str(path)) + if audio is None: + return None + + # FLAC / OGG-FLAC: typed picture blocks on the file object. + pictures = getattr(audio, "pictures", None) + if pictures: + cover = _from_picture(_front_or_first(pictures)) + if cover is not None: + return cover + + tags = audio.tags + if tags is None: + return None + + # MP3 / anything with ID3 frames: APIC frames keyed as "APIC:...". + apics = [frame for frame in tags.values() if frame.__class__.__name__ == "APIC"] + if apics: + cover = _from_picture(_front_or_first(apics)) + if cover is not None: + return cover + + get = getattr(tags, "get", None) + if get is None: + return None + + # MP4 / M4A: "covr" atom holds a list of MP4Cover (a bytes subclass). + covr = get("covr") + if covr: + mp4_cover = covr[0] + content_type = _MP4_FORMATS.get(getattr(mp4_cover, "imageformat", -1), "image/jpeg") + return CoverArt(data=bytes(mp4_cover), content_type=content_type) + + # OGG Vorbis: base64 picture block in METADATA_BLOCK_PICTURE. + block = get("metadata_block_picture") + if block: + cover = _from_picture(_decode_vorbis_picture(block[0])) + if cover is not None: + return cover + + return None + + +def _from_picture(picture: Any) -> CoverArt | None: + """Build a :class:`CoverArt` from a mutagen picture/APIC frame, or ``None``.""" + if picture is None: + return None + data = getattr(picture, "data", None) + if not data: + return None + mime = getattr(picture, "mime", None) or "image/jpeg" + return CoverArt(data=bytes(data), content_type=str(mime)) + + +def _front_or_first(pictures: list[Any]) -> Any: + """Prefer the front-cover picture (type 3), else the first available.""" + for pic in pictures: + if getattr(pic, "type", None) == _FRONT_COVER: + return pic + return pictures[0] if pictures else None + + +def _decode_vorbis_picture(encoded: str) -> Any: + try: + return Picture(base64.b64decode(encoded)) # type: ignore[no-untyped-call] + except Exception: + return None diff --git a/app/infrastructure/metadata/coverart.py b/app/infrastructure/metadata/coverart.py new file mode 100644 index 0000000..a55ab42 --- /dev/null +++ b/app/infrastructure/metadata/coverart.py @@ -0,0 +1,83 @@ +"""CoverArtArchiveClient — fetches front cover art from the Cover Art Archive. + +The network fallback when a file carries no embedded artwork: given a +MusicBrainz **release-group** id (supplied by the AcoustID lookup), request the +front image from ``coverartarchive.org``. The CAA redirects to the Internet +Archive, so redirects are followed. ``thumbnail`` 500px keeps payloads small. + +Graceful degradation (CLAUDE.md): no release-group id → never called; any +network/HTTP error (incl. 404 "no cover") → returns ``None``, never raises. A +small inter-call delay respects the shared MusicBrainz/CAA infrastructure. +""" + +import asyncio +import time + +import httpx + +from app.core.logging import get_logger +from app.domain.entities.cover import CoverArt + +log = get_logger(__name__) + +_DEFAULT_BASE_URL = "https://coverartarchive.org" +_TIMEOUT_SECONDS = 15.0 +_MIN_INTERVAL_SECONDS = 1.0 # CAA piggybacks on MusicBrainz infra; stay polite +_MAX_BYTES = 10 * 1024 * 1024 # ignore absurdly large images + + +class CoverArtArchiveClient: + """Implements :class:`app.domain.ports.CoverArtProvider`.""" + + _throttle_lock = asyncio.Lock() + _last_call_monotonic = 0.0 + + def __init__( + self, + *, + user_agent: str, + enabled: bool = True, + base_url: str = _DEFAULT_BASE_URL, + ) -> None: + self._user_agent = user_agent + self._enabled = enabled + self._base_url = base_url.rstrip("/") + + def is_available(self) -> bool: + return self._enabled + + async def fetch_release_group(self, release_group_mbid: str) -> CoverArt | None: + if not self._enabled or not release_group_mbid: + return None + url = f"{self._base_url}/release-group/{release_group_mbid}/front-500" + try: + await self._throttle() + async with httpx.AsyncClient( + timeout=_TIMEOUT_SECONDS, + follow_redirects=True, + headers={"User-Agent": self._user_agent}, + ) as client: + resp = await client.get(url) + if resp.status_code == 404: + return None # no cover for this release group — normal, not an error + resp.raise_for_status() + except httpx.HTTPError: + log.warning("coverart_fetch_failed", release_group=release_group_mbid) + return None + + data = resp.content + if not data or len(data) > _MAX_BYTES: + return None + content_type = resp.headers.get("content-type", "image/jpeg").split(";")[0].strip() + if not content_type.startswith("image/"): + return None + return CoverArt(data=data, content_type=content_type) + + @classmethod + async def _throttle(cls) -> None: + async with cls._throttle_lock: + elapsed = time.monotonic() - cls._last_call_monotonic + wait = _MIN_INTERVAL_SECONDS - elapsed + if wait > 0: + await asyncio.sleep(wait) + cls._last_call_monotonic = time.monotonic() diff --git a/app/infrastructure/metadata/fingerprint.py b/app/infrastructure/metadata/fingerprint.py index f901b69..024424a 100644 --- a/app/infrastructure/metadata/fingerprint.py +++ b/app/infrastructure/metadata/fingerprint.py @@ -41,7 +41,7 @@ class FpcalcFingerprinter: ) async with asyncio.timeout(_TIMEOUT_SECONDS): stdout, _stderr = await proc.communicate() - except (TimeoutError, OSError): + except TimeoutError, OSError: log.warning("fpcalc_failed", path=str(path)) return None @@ -53,7 +53,7 @@ class FpcalcFingerprinter: data = json.loads(stdout) fingerprint = str(data["fingerprint"]) duration = round(float(data["duration"])) - except (json.JSONDecodeError, KeyError, ValueError): + except json.JSONDecodeError, KeyError, ValueError: log.warning("fpcalc_bad_output", path=str(path)) return None diff --git a/app/infrastructure/storage/s3.py b/app/infrastructure/storage/s3.py index 0cc23cc..19ebc0a 100644 --- a/app/infrastructure/storage/s3.py +++ b/app/infrastructure/storage/s3.py @@ -84,9 +84,7 @@ class S3FileStorage: async def _stream() -> AsyncGenerator[bytes]: async with self._client() as s3: try: - resp = await s3.get_object( - Bucket=_bucket, Key=_key, Range=range_header - ) + resp = await s3.get_object(Bucket=_bucket, Key=_key, Range=range_header) except ClientError as exc: raise StorageError(str(exc)) from exc body = resp["Body"] diff --git a/app/workers/tasks/enrich_task.py b/app/workers/tasks/enrich_task.py index 492c322..ca8ff98 100644 --- a/app/workers/tasks/enrich_task.py +++ b/app/workers/tasks/enrich_task.py @@ -19,6 +19,8 @@ from app.infrastructure.db.repositories import ( SqlAlchemyTrackRepository, ) from app.infrastructure.metadata.acoustid import AcoustIdHttpClient +from app.infrastructure.metadata.cover_extractor import MutagenCoverExtractor +from app.infrastructure.metadata.coverart import CoverArtArchiveClient from app.infrastructure.metadata.fingerprint import FpcalcFingerprinter from app.infrastructure.metadata.tags import MutagenTagReader from app.infrastructure.storage.provider import get_file_storage @@ -28,14 +30,17 @@ log = get_logger("worker.enrich") async def enrich_track(_ctx: dict[str, Any], *, track_id: str) -> dict[str, Any]: settings = get_settings() - api_key = ( - settings.acoustid_api_key.get_secret_value() if settings.acoustid_api_key else None - ) + api_key = settings.acoustid_api_key.get_secret_value() if settings.acoustid_api_key else None acoustid = AcoustIdHttpClient( api_key=api_key, user_agent=settings.musicbrainz_user_agent, api_url=settings.acoustid_api_url, ) + cover_provider = CoverArtArchiveClient( + user_agent=settings.musicbrainz_user_agent, + enabled=settings.coverart_enabled, + base_url=settings.coverart_base_url, + ) async with session_scope() as session: service = MetadataEnrichmentService( @@ -46,6 +51,8 @@ async def enrich_track(_ctx: dict[str, Any], *, track_id: str) -> dict[str, Any] tag_reader=MutagenTagReader(), fingerprinter=FpcalcFingerprinter(settings.fpcalc_path), acoustid=acoustid, + cover_extractor=MutagenCoverExtractor(), + cover_provider=cover_provider, ) result = await service.enrich(uuid.UUID(track_id)) diff --git a/tests/test_acoustid_parse.py b/tests/test_acoustid_parse.py index 9d1c929..717a74c 100644 --- a/tests/test_acoustid_parse.py +++ b/tests/test_acoustid_parse.py @@ -33,6 +33,7 @@ def test_parses_full_recording() -> None: assert match.title == "One More Time" assert match.artist == "Daft Punk" assert match.album == "Discovery" + assert match.release_group_mbid == "rg1" assert match.score == 0.97 diff --git a/tests/test_cover_api.py b/tests/test_cover_api.py new file mode 100644 index 0000000..04e7dd2 --- /dev/null +++ b/tests/test_cover_api.py @@ -0,0 +1,194 @@ +"""Integration tests for the native cover-art endpoints. + +Seeds an album with a stored cover, then exercises the ``/api/v1`` album/track +cover endpoints (token auth, 404 when absent). Requires a reachable Postgres; +skips otherwise. +""" + +import asyncio +import os +import uuid +from collections.abc import AsyncIterator +from pathlib import Path + +import pytest +from app.core.config import get_settings +from app.infrastructure.db import Base, dispose_engine, get_engine, session_scope +from app.infrastructure.db.repositories import ( + SqlAlchemyAlbumRepository, + SqlAlchemyArtistRepository, + SqlAlchemyRefreshTokenRepository, + SqlAlchemyTrackRepository, + SqlAlchemyUserRepository, +) +from app.infrastructure.storage.provider import get_file_storage +from asgi_lifespan import LifespanManager +from httpx import ASGITransport, AsyncClient + +pytestmark = pytest.mark.asyncio + +# A minimal valid 1x1 PNG. +_PNG_BYTES = bytes.fromhex( + "89504e470d0a1a0a0000000d4948445200000001000000010802000000907753" + "de0000000c4944415408d763f8cfc0f01f0005000155a2b4f60000000049454e44ae426082" +) + +_db_reachable_cache: bool | None = None + + +async def _db_reachable() -> bool: + global _db_reachable_cache + if _db_reachable_cache is not None: + return _db_reachable_cache + from sqlalchemy import text + + try: + async with asyncio.timeout(3): + async with get_engine().connect() as conn: + await conn.execute(text("SELECT 1")) + _db_reachable_cache = True + except Exception: + _db_reachable_cache = False + return _db_reachable_cache + + +async def _seed_album_with_cover(*, with_cover: bool) -> tuple[uuid.UUID, uuid.UUID]: + """Create an artist + album (+ optional cover file) + track. Returns + ``(album_id, track_id)``.""" + async with session_scope() as session: + artist = await SqlAlchemyArtistRepository(session).get_or_create("Coverart Artist") + album = await SqlAlchemyAlbumRepository(session).get_or_create( + title="Coverart Album", artist_id=artist.id, year=2020, musicbrainz_id=None + ) + track = await SqlAlchemyTrackRepository(session).add( + id=uuid.uuid4(), + title="Coverart Track", + artist_id=artist.id, + storage_uri="tracks/zz/cover-track.mp3", + file_format="mp3", + file_size=10, + source="upload", + source_id="cover-src", + metadata_status="enriched", + added_by=None, + ) + # Link the track to the album (add() doesn't take album_id). + await SqlAlchemyTrackRepository(session).apply_enrichment( + track.id, + title="Coverart Track", + artist_id=artist.id, + album_id=album.id, + genre=None, + year=2020, + track_number=1, + duration_seconds=1, + bitrate=None, + acoustid_fingerprint=None, + musicbrainz_id=None, + metadata_status="enriched", + ) + if with_cover: + key = f"covers/{album.id}.png" + import tempfile + + with tempfile.NamedTemporaryFile(suffix=".png") as tmp: + tmp.write(_PNG_BYTES) + tmp.flush() + await get_file_storage().save_file(key, Path(tmp.name)) + await SqlAlchemyAlbumRepository(session).set_cover_path(album.id, key) + return album.id, track.id + + +@pytest.fixture +async def api(tmp_path: Path) -> AsyncIterator[AsyncClient]: + if not await _db_reachable(): + pytest.skip("Postgres not reachable — integration test skipped.") + + os.environ["MEDIA_PATH"] = str(tmp_path) + get_settings.cache_clear() + + import app.infrastructure.storage.provider as _storage_provider + + _storage_provider._storage = None + + try: + async with get_engine().begin() as conn: + await conn.run_sync(Base.metadata.drop_all) + await conn.run_sync(Base.metadata.create_all) + + from app.application.user_service import UserService + from app.core.security import Argon2PasswordHasher + + async with session_scope() as session: + await UserService( + users=SqlAlchemyUserRepository(session), + refresh_tokens=SqlAlchemyRefreshTokenRepository(session), + hasher=Argon2PasswordHasher(), + ).create_user(username="testuser", password="testpass1", is_superuser=False) + + from app.main import create_app + + app = create_app() + async with LifespanManager(app): + transport = ASGITransport(app=app) + async with AsyncClient(transport=transport, base_url="http://test") as client: + yield client + + async with get_engine().begin() as conn: + await conn.run_sync(Base.metadata.drop_all) + await dispose_engine() + finally: + _storage_provider._storage = None + os.environ.pop("MEDIA_PATH", None) + get_settings.cache_clear() + + +async def _login(api: AsyncClient) -> str: + resp = await api.post( + "/api/v1/auth/login", json={"username": "testuser", "password": "testpass1"} + ) + assert resp.status_code == 200 + return str(resp.json()["access_token"]) + + +async def test_album_cover_served(api: AsyncClient) -> None: + token = await _login(api) + album_id, _ = await _seed_album_with_cover(with_cover=True) + + resp = await api.get(f"/api/v1/albums/{album_id}/cover?token={token}") + assert resp.status_code == 200, resp.text + assert resp.headers["content-type"] == "image/png" + assert resp.content == _PNG_BYTES + + +async def test_track_cover_served_from_album(api: AsyncClient) -> None: + token = await _login(api) + _, track_id = await _seed_album_with_cover(with_cover=True) + + resp = await api.get(f"/api/v1/tracks/{track_id}/cover?token={token}") + assert resp.status_code == 200, resp.text + assert resp.headers["content-type"] == "image/png" + assert resp.content == _PNG_BYTES + + +async def test_album_without_cover_is_404(api: AsyncClient) -> None: + token = await _login(api) + album_id, _ = await _seed_album_with_cover(with_cover=False) + + resp = await api.get(f"/api/v1/albums/{album_id}/cover?token={token}") + assert resp.status_code == 404 + + +async def test_cover_requires_auth(api: AsyncClient) -> None: + album_id, _ = await _seed_album_with_cover(with_cover=True) + resp = await api.get(f"/api/v1/albums/{album_id}/cover") + assert resp.status_code == 401 + + +async def test_album_appears_with_has_cover_flag(api: AsyncClient) -> None: + token = await _login(api) + album_id, _ = await _seed_album_with_cover(with_cover=True) + + resp = await api.get(f"/api/v1/albums/{album_id}", headers={"Authorization": f"Bearer {token}"}) + assert resp.status_code == 200 + assert resp.json()["has_cover"] is True diff --git a/tests/test_metadata_service.py b/tests/test_metadata_service.py index ef302f7..b49611a 100644 --- a/tests/test_metadata_service.py +++ b/tests/test_metadata_service.py @@ -15,6 +15,7 @@ import pytest from app.application.metadata_service import MetadataEnrichmentService from app.domain.entities import Artist, Track from app.domain.entities.album import Album +from app.domain.entities.cover import CoverArt from app.domain.entities.metadata import AudioTags, Fingerprint, RecordingMatch pytestmark = pytest.mark.asyncio @@ -67,8 +68,10 @@ class FakeArtistRepo: class FakeAlbumRepo: - def __init__(self) -> None: + def __init__(self, *, cover_path: str | None = None) -> None: self.created: list[tuple[str, uuid.UUID]] = [] + self.covers: dict[uuid.UUID, str] = {} + self._existing_cover = cover_path async def get_or_create( self, *, title: str, artist_id: uuid.UUID, year: int | None, musicbrainz_id: str | None @@ -80,18 +83,52 @@ class FakeAlbumRepo: title=title, artist_id=artist_id, year=year, - cover_path=None, + cover_path=self._existing_cover, musicbrainz_id=musicbrainz_id, created_at=now, updated_at=now, ) + async def set_cover_path(self, album_id: uuid.UUID, cover_path: str) -> None: + self.covers[album_id] = cover_path + class FakeStorage: + def __init__(self) -> None: + self.saved: list[str] = [] + @asynccontextmanager async def as_local_path(self, key: str) -> AsyncIterator[Path]: yield Path("/tmp") / key + async def save_file(self, key: str, src_path: Path) -> int: + self.saved.append(key) + return 1 + + +class FakeCoverExtractor: + def __init__(self, cover: CoverArt | None) -> None: + self._cover = cover + self.calls = 0 + + async def extract(self, path: Path) -> CoverArt | None: + self.calls += 1 + return self._cover + + +class FakeCoverProvider: + def __init__(self, cover: CoverArt | None, *, available: bool = True) -> None: + self._cover = cover + self._available = available + self.calls = 0 + + def is_available(self) -> bool: + return self._available + + async def fetch_release_group(self, release_group_mbid: str) -> CoverArt | None: + self.calls += 1 + return self._cover + class FakeTagReader: def __init__(self, tags: AudioTags | None) -> None: @@ -281,3 +318,114 @@ async def test_fingerprint_skipped_when_acoustid_unavailable() -> None: # tags still enrich, but no AcoustID call is attempted assert acoustid.calls == 0 assert result.status == "enriched" + + +# -- cover-art resolution ----------------------------------------------------- +_PNG = CoverArt(data=b"\x89PNG\r\n", content_type="image/png") +_JPG = CoverArt(data=b"\xff\xd8\xff", content_type="image/jpeg") + + +def _cover_service( + *, + track: Track, + tags: AudioTags | None = None, + match: RecordingMatch | None = None, + fp: Fingerprint | None = None, + extractor: FakeCoverExtractor | None = None, + provider: FakeCoverProvider | None = None, + existing_cover: str | None = None, +) -> tuple[MetadataEnrichmentService, FakeAlbumRepo, FakeStorage]: + albums = FakeAlbumRepo(cover_path=existing_cover) + storage = FakeStorage() + service = MetadataEnrichmentService( + tracks=FakeTrackRepo(track), # type: ignore[arg-type] + artists=FakeArtistRepo(), # type: ignore[arg-type] + albums=albums, # type: ignore[arg-type] + storage=storage, # type: ignore[arg-type] + tag_reader=FakeTagReader(tags), # type: ignore[arg-type] + fingerprinter=FakeFingerprinter(fp), # type: ignore[arg-type] + acoustid=FakeAcoustId(match), # type: ignore[arg-type] + cover_extractor=extractor, # type: ignore[arg-type] + cover_provider=provider, # type: ignore[arg-type] + ) + return service, albums, storage + + +async def test_cover_extracted_from_embedded_art() -> None: + track = _track() + extractor = FakeCoverExtractor(_PNG) + provider = FakeCoverProvider(_JPG) + service, albums, storage = _cover_service( + track=track, tags=AudioTags(album="The Wall", artist="PF"), + extractor=extractor, provider=provider, + ) + + await service.enrich(track.id) + + assert extractor.calls == 1 + assert provider.calls == 0 # embedded art wins → no network fetch + assert len(albums.covers) == 1 + key = next(iter(albums.covers.values())) + assert key.startswith("covers/") and key.endswith(".png") + assert storage.saved == [key] + + +async def test_cover_falls_back_to_archive() -> None: + track = _track() + extractor = FakeCoverExtractor(None) # no embedded art + provider = FakeCoverProvider(_JPG) + match = RecordingMatch(acoustid="ac", score=1.0, release_group_mbid="rg-123", album="The Wall") + fp = Fingerprint(fingerprint="AQAA", duration_seconds=200) + service, albums, storage = _cover_service( + track=track, tags=AudioTags(album="The Wall", artist="PF"), + match=match, fp=fp, extractor=extractor, provider=provider, + ) + + await service.enrich(track.id) + + assert extractor.calls == 1 + assert provider.calls == 1 + key = next(iter(albums.covers.values())) + assert key.endswith(".jpg") + assert storage.saved == [key] + + +async def test_cover_not_fetched_without_release_group() -> None: + track = _track() + provider = FakeCoverProvider(_JPG) + service, albums, _ = _cover_service( + track=track, tags=AudioTags(album="The Wall", artist="PF"), + extractor=FakeCoverExtractor(None), provider=provider, + ) + + await service.enrich(track.id) + + assert provider.calls == 0 # no release-group mbid → nothing to look up + assert albums.covers == {} + + +async def test_existing_cover_is_not_overwritten() -> None: + track = _track() + extractor = FakeCoverExtractor(_PNG) + service, albums, storage = _cover_service( + track=track, tags=AudioTags(album="The Wall", artist="PF"), + extractor=extractor, existing_cover="covers/old.jpg", + ) + + await service.enrich(track.id) + + assert extractor.calls == 0 # album already has a cover → skip entirely + assert albums.covers == {} + assert storage.saved == [] + + +async def test_cover_skipped_when_no_album() -> None: + track = _track() + extractor = FakeCoverExtractor(_PNG) + # no album tag and no match → no album resolved → no cover work + service, _albums, storage = _cover_service(track=track, extractor=extractor) + + await service.enrich(track.id) + + assert extractor.calls == 0 + assert storage.saved == [] diff --git a/tests/test_upload_stream_api.py b/tests/test_upload_stream_api.py index 2d28b70..f87f88c 100644 --- a/tests/test_upload_stream_api.py +++ b/tests/test_upload_stream_api.py @@ -175,9 +175,7 @@ async def test_stream_range(api: AsyncClient) -> None: async def test_stream_not_found(api: AsyncClient) -> None: token = await _login(api) - resp = await api.get( - f"/api/v1/stream/00000000-0000-0000-0000-000000000000?token={token}" - ) + resp = await api.get(f"/api/v1/stream/00000000-0000-0000-0000-000000000000?token={token}") assert resp.status_code == 404