feat: cover-art pipeline (§1D)
Docker Build & Publish / build (push) Has been cancelled
Docker Build & Publish / push (push) Has been cancelled
Docker Build & Publish / Prune old image versions (push) Has been cancelled

Resolve, store and serve album cover art.

Sources (tag-first, mirroring enrichment): embedded artwork extracted
offline via mutagen (ID3 APIC / FLAC+OGG Picture / MP4 covr), then Cover
Art Archive by release-group MBID as a network fallback. Resolution runs
inside MetadataEnrichmentService after album resolution, only when the
album has no cover yet (idempotent, never overwrites), and is best-effort
so a cover failure never affects enrichment status.

- CoverArt value object + CoverArtExtractor/CoverArtProvider ports
- MutagenCoverExtractor + CoverArtArchiveClient adapters
- AcoustID parser now captures release_group_mbid
- Covers stored via FileStorage at covers/{album_id}.{ext} (local + S3)
- AlbumRepository.set_cover_path
- Serve real covers: GET /api/v1/albums|tracks/{id}/cover (StreamUser,
  ?token=), Subsonic getCoverArt (placeholder fallback)
- has_cover flag on AlbumOut/TrackOut
- coverart_enabled / coverart_base_url settings
- tests: cover resolution units + release_group parse + DB-backed
  test_cover_api.py (139 green via make test-api)

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
Senko-san
2026-06-13 12:10:05 +03:00
parent c7e078d758
commit 0bb752f582
23 changed files with 834 additions and 30 deletions
+57
View File
@@ -0,0 +1,57 @@
"""Shared cover-art serving helper (presentation).
Streams a stored cover image from the :class:`FileStorage` port. Used by the
native ``/api/v1`` cover endpoints and the Subsonic ``getCoverArt`` adapter so
the streaming/content-type logic lives in one place.
"""
import uuid
from fastapi.responses import StreamingResponse
from app.domain.entities.album import Album
from app.domain.errors import NotFoundError, StorageError
from app.domain.ports import AlbumRepository, FileStorage, TrackRepository
_CONTENT_TYPE_BY_EXT: dict[str, str] = {
"jpg": "image/jpeg",
"jpeg": "image/jpeg",
"png": "image/png",
"webp": "image/webp",
"gif": "image/gif",
}
# Covers are immutable for a given album (a new cover means a new key), so let
# clients cache aggressively.
_CACHE_CONTROL = "public, max-age=86400"
def _content_type_for(key: str) -> str:
ext = key.rsplit(".", 1)[-1].lower() if "." in key else ""
return _CONTENT_TYPE_BY_EXT.get(ext, "application/octet-stream")
async def stream_cover(storage: FileStorage, cover_path: str) -> StreamingResponse:
"""Stream a stored cover by its storage key. Raises ``NotFoundError`` if the
object is missing (a dangling ``cover_path`` reads as "no cover")."""
try:
stream, total = await storage.open_range(cover_path, 0, None)
except StorageError as exc:
raise NotFoundError("Cover not found.") from exc
return StreamingResponse(
stream,
media_type=_content_type_for(cover_path),
headers={"Content-Length": str(total), "Cache-Control": _CACHE_CONTROL},
)
async def resolve_album_for_track(
track_repo: TrackRepository,
album_repo: AlbumRepository,
track_id: uuid.UUID,
) -> Album | None:
"""The album that owns a track (cover lives on the album), or ``None``."""
track = await track_repo.get_by_id(track_id)
if track is None or track.album_id is None:
return None
return await album_repo.get_by_id(track.album_id)
+32 -6
View File
@@ -13,9 +13,17 @@ from typing import Annotated
from fastapi import APIRouter, Header, Query from fastapi import APIRouter, Header, Query
from fastapi.responses import Response, StreamingResponse from fastapi.responses import Response, StreamingResponse
from app.api.deps import StreamingServiceDep, SubsonicUser, TrackRepoDep from app.api.covers import resolve_album_for_track, stream_cover
from app.api.rest.ids import decode_track, parse from app.api.deps import (
from app.domain.errors import NotFoundError AlbumRepoDep,
FileStorageDep,
StreamingServiceDep,
SubsonicUser,
TrackRepoDep,
)
from app.api.rest.ids import IdKind, decode_track, parse
from app.domain.entities.album import Album
from app.domain.errors import NotFoundError, StorageError
router = APIRouter() router = APIRouter()
@@ -69,10 +77,28 @@ async def download(
@router.api_route("/getCoverArt.view", methods=["GET", "POST"]) @router.api_route("/getCoverArt.view", methods=["GET", "POST"])
async def get_cover_art( async def get_cover_art(
_user: SubsonicUser, _user: SubsonicUser,
album_repo: AlbumRepoDep,
track_repo: TrackRepoDep,
storage: FileStorageDep,
id: Annotated[str, Query()], id: Annotated[str, Query()],
size: Annotated[int | None, Query()] = None, size: Annotated[int | None, Query()] = None,
) -> Response: ) -> Response:
# Validate the id shape so clients get a clean error on garbage, then serve a # Cover ids reuse the entity id: ``al-<uuid>`` (album) or ``tr-<uuid>``
# placeholder. TODO: stream real covers once the cover pipeline exists. # (track → its album). Unlike the native API, Subsonic clients expect an
parse(id) # image either way, so a missing cover falls back to a placeholder rather
# than 404. ``size`` is accepted but ignored (we serve the stored image).
kind, value = parse(id)
album: Album | None
if kind is IdKind.ALBUM:
album = await album_repo.get_by_id(value)
elif kind is IdKind.TRACK:
album = await resolve_album_for_track(track_repo, album_repo, value)
else:
album = None
if album is not None and album.cover_path:
try:
return await stream_cover(storage, album.cover_path)
except NotFoundError, StorageError:
pass
return Response(content=_PLACEHOLDER_PNG, media_type="image/png") return Response(content=_PLACEHOLDER_PNG, media_type="image/png")
+1
View File
@@ -13,4 +13,5 @@ class AlbumOut(BaseModel):
artist_name: str artist_name: str
year: int | None year: int | None
track_count: int track_count: int
has_cover: bool
created_at: dt.datetime created_at: dt.datetime
+1
View File
@@ -18,6 +18,7 @@ class TrackOut(BaseModel):
file_size: int file_size: int
metadata_status: str metadata_status: str
source: str source: str
has_cover: bool
created_at: dt.datetime created_at: dt.datetime
+22 -3
View File
@@ -1,11 +1,19 @@
"""Album endpoints.""" """Album endpoints."""
import uuid import uuid
from typing import Any
from fastapi import APIRouter, Query from fastapi import APIRouter, Query
from fastapi.responses import StreamingResponse
from app.api.deps import AlbumRepoDep, ArtistRepoDep, CurrentUser, TrackRepoDep from app.api.covers import stream_cover
from app.api.deps import (
AlbumRepoDep,
ArtistRepoDep,
CurrentUser,
FileStorageDep,
StreamUser,
TrackRepoDep,
)
from app.api.schemas.album import AlbumOut from app.api.schemas.album import AlbumOut
from app.api.schemas.pagination import PagedResponse from app.api.schemas.pagination import PagedResponse
from app.api.schemas.track import TrackOut from app.api.schemas.track import TrackOut
@@ -30,6 +38,7 @@ async def _build_album_out(
artist_name=artists[a.artist_id].name if a.artist_id in artists else "Unknown Artist", artist_name=artists[a.artist_id].name if a.artist_id in artists else "Unknown Artist",
year=a.year, year=a.year,
track_count=track_counts.get(a.id, 0), track_count=track_counts.get(a.id, 0),
has_cover=bool(a.cover_path),
created_at=a.created_at, created_at=a.created_at,
) )
for a in albums for a in albums
@@ -109,4 +118,14 @@ async def get_album_tracks(
@router.get("/{album_id}/cover") @router.get("/{album_id}/cover")
async def get_album_cover(album_id: uuid.UUID, _: CurrentUser) -> Any: ... async def get_album_cover(
album_id: uuid.UUID,
album_repo: AlbumRepoDep,
storage: FileStorageDep,
_: StreamUser,
) -> StreamingResponse:
# ``<img>`` can't send a bearer header → StreamUser accepts ``?token=``.
album = await album_repo.get_by_id(album_id)
if album is None or not album.cover_path:
raise NotFoundError("Cover not found.")
return await stream_cover(storage, album.cover_path)
+24 -2
View File
@@ -4,8 +4,17 @@ import uuid
from typing import Any from typing import Any
from fastapi import APIRouter, Query, Response from fastapi import APIRouter, Query, Response
from fastapi.responses import StreamingResponse
from app.api.deps import AlbumRepoDep, ArtistRepoDep, CurrentUser, FileStorageDep, TrackRepoDep from app.api.covers import resolve_album_for_track, stream_cover
from app.api.deps import (
AlbumRepoDep,
ArtistRepoDep,
CurrentUser,
FileStorageDep,
StreamUser,
TrackRepoDep,
)
from app.api.schemas.pagination import PagedResponse from app.api.schemas.pagination import PagedResponse
from app.api.schemas.track import TrackOut, TrackUpdate from app.api.schemas.track import TrackOut, TrackUpdate
from app.domain.entities.album import Album from app.domain.entities.album import Album
@@ -34,6 +43,7 @@ async def _build_track_out(
file_size=t.file_size, file_size=t.file_size,
metadata_status=t.metadata_status, metadata_status=t.metadata_status,
source=t.source, source=t.source,
has_cover=bool(t.album_id and albums.get(t.album_id) and albums[t.album_id].cover_path),
created_at=t.created_at, created_at=t.created_at,
) )
for t in tracks for t in tracks
@@ -144,7 +154,19 @@ async def optimize_track(track_id: uuid.UUID, _: CurrentUser) -> Any: ...
@router.get("/{track_id}/cover") @router.get("/{track_id}/cover")
async def get_track_cover(track_id: uuid.UUID, _: CurrentUser) -> Any: ... async def get_track_cover(
track_id: uuid.UUID,
track_repo: TrackRepoDep,
album_repo: AlbumRepoDep,
storage: FileStorageDep,
_: StreamUser,
) -> StreamingResponse:
# A track's cover is its album's cover. ``<img>`` can't send a bearer
# header → StreamUser accepts ``?token=``.
album = await resolve_album_for_track(track_repo, album_repo, track_id)
if album is None or not album.cover_path:
raise NotFoundError("Cover not found.")
return await stream_cover(storage, album.cover_path)
@router.post("/{track_id}/metadata/enrich") @router.post("/{track_id}/metadata/enrich")
+77 -4
View File
@@ -12,10 +12,14 @@ Invariants (plan §6.2, CLAUDE.md):
- **Idempotent** — re-running only fills gaps; ``apply_enrichment`` never erases. - **Idempotent** — re-running only fills gaps; ``apply_enrichment`` never erases.
""" """
import tempfile
import uuid import uuid
from dataclasses import dataclass from dataclasses import dataclass
from pathlib import Path
from app.core.logging import get_logger from app.core.logging import get_logger
from app.domain.entities.album import Album
from app.domain.entities.cover import CoverArt
from app.domain.entities.metadata import AudioTags, RecordingMatch from app.domain.entities.metadata import AudioTags, RecordingMatch
from app.domain.ports import ( from app.domain.ports import (
AcoustIdClient, AcoustIdClient,
@@ -23,6 +27,8 @@ from app.domain.ports import (
ArtistRepository, ArtistRepository,
AudioFingerprinter, AudioFingerprinter,
AudioTagReader, AudioTagReader,
CoverArtExtractor,
CoverArtProvider,
FileStorage, FileStorage,
TrackRepository, TrackRepository,
) )
@@ -50,6 +56,8 @@ class MetadataEnrichmentService:
tag_reader: AudioTagReader, tag_reader: AudioTagReader,
fingerprinter: AudioFingerprinter, fingerprinter: AudioFingerprinter,
acoustid: AcoustIdClient, acoustid: AcoustIdClient,
cover_extractor: CoverArtExtractor | None = None,
cover_provider: CoverArtProvider | None = None,
) -> None: ) -> None:
self._tracks = tracks self._tracks = tracks
self._artists = artists self._artists = artists
@@ -58,6 +66,8 @@ class MetadataEnrichmentService:
self._tag_reader = tag_reader self._tag_reader = tag_reader
self._fingerprinter = fingerprinter self._fingerprinter = fingerprinter
self._acoustid = acoustid self._acoustid = acoustid
self._cover_extractor = cover_extractor
self._cover_provider = cover_provider
async def enrich(self, track_id: uuid.UUID) -> EnrichmentResult: async def enrich(self, track_id: uuid.UUID) -> EnrichmentResult:
track = await self._tracks.get_by_id(track_id) track = await self._tracks.get_by_id(track_id)
@@ -92,7 +102,15 @@ class MetadataEnrichmentService:
acoustid_id = match.acoustid if match else None acoustid_id = match.acoustid if match else None
artist_id = await self._resolve_artist(artist_name, fallback=track.artist_id) artist_id = await self._resolve_artist(artist_name, fallback=track.artist_id)
album_id = await self._resolve_album(album_title, artist_id=artist_id, year=year, mbid=mbid) album = await self._resolve_album(album_title, artist_id=artist_id, year=year, mbid=mbid)
album_id = album.id if album is not None else None
if album is not None:
await self._resolve_cover(
album,
storage_uri=track.storage_uri,
release_group_mbid=match.release_group_mbid if match else None,
)
identified = bool(artist_name) or album_id is not None or mbid is not None identified = bool(artist_name) or album_id is not None or mbid is not None
status = "enriched" if identified else "failed" status = "enriched" if identified else "failed"
@@ -148,16 +166,71 @@ class MetadataEnrichmentService:
artist_id: uuid.UUID, artist_id: uuid.UUID,
year: int | None, year: int | None,
mbid: str | None, mbid: str | None,
) -> uuid.UUID | None: ) -> Album | None:
if not title: if not title:
return None return None
album = await self._albums.get_or_create( return await self._albums.get_or_create(
title=title, title=title,
artist_id=artist_id, artist_id=artist_id,
year=year, year=year,
musicbrainz_id=mbid, musicbrainz_id=mbid,
) )
return album.id
async def _resolve_cover(
self,
album: Album,
*,
storage_uri: str,
release_group_mbid: str | None,
) -> None:
"""Fill in an album cover when it has none. Source order mirrors the
tag-first pipeline: embedded artwork (offline) → Cover Art Archive
(network, by release-group). Best-effort — any failure is swallowed so a
missing cover never affects enrichment status."""
if album.cover_path:
return # already has one — never overwrite (idempotent)
cover = await self._extract_cover(storage_uri)
if cover is None:
cover = await self._fetch_cover(release_group_mbid)
if cover is None:
return
try:
key = await self._save_cover(album.id, cover)
await self._albums.set_cover_path(album.id, key)
log.info("cover_resolved", album_id=str(album.id), content_type=cover.content_type)
except Exception:
log.warning("cover_save_failed", album_id=str(album.id))
async def _extract_cover(self, storage_uri: str) -> CoverArt | None:
if self._cover_extractor is None:
return None
try:
async with self._storage.as_local_path(storage_uri) as path:
return await self._cover_extractor.extract(path)
except Exception:
log.warning("cover_extract_step_failed", storage_uri=storage_uri)
return None
async def _fetch_cover(self, release_group_mbid: str | None) -> CoverArt | None:
if self._cover_provider is None or not release_group_mbid:
return None
if not self._cover_provider.is_available():
return None
try:
return await self._cover_provider.fetch_release_group(release_group_mbid)
except Exception:
log.warning("cover_fetch_step_failed", release_group=release_group_mbid)
return None
async def _save_cover(self, album_id: uuid.UUID, cover: CoverArt) -> str:
key = f"covers/{album_id}.{cover.extension}"
with tempfile.NamedTemporaryFile(suffix=f".{cover.extension}") as tmp:
tmp.write(cover.data)
tmp.flush()
await self._storage.save_file(key, Path(tmp.name))
return key
def _opt_str(*values: str | None) -> str | None: def _opt_str(*values: str | None) -> str | None:
+5
View File
@@ -103,6 +103,11 @@ class Settings(BaseSettings):
# image installs it via libchromaprint-tools. # image installs it via libchromaprint-tools.
fpcalc_path: str = "fpcalc" fpcalc_path: str = "fpcalc"
# Cover Art Archive — network fallback for album covers (after embedded art).
# Disable to keep enrichment fully offline; embedded artwork still works.
coverart_enabled: bool = True
coverart_base_url: str = "https://coverartarchive.org"
@field_validator("database_url") @field_validator("database_url")
@classmethod @classmethod
def _require_async_driver(cls, v: str) -> str: def _require_async_driver(cls, v: str) -> str:
+2
View File
@@ -1,6 +1,7 @@
"""Domain entities and value objects — pure, framework-free.""" """Domain entities and value objects — pure, framework-free."""
from app.domain.entities.album import Album from app.domain.entities.album import Album
from app.domain.entities.cover import CoverArt
from app.domain.entities.history import PlayHistoryEntry from app.domain.entities.history import PlayHistoryEntry
from app.domain.entities.like import Like from app.domain.entities.like import Like
from app.domain.entities.metadata import AudioTags, Fingerprint, RecordingMatch from app.domain.entities.metadata import AudioTags, Fingerprint, RecordingMatch
@@ -13,6 +14,7 @@ __all__ = [
"Album", "Album",
"Artist", "Artist",
"AudioTags", "AudioTags",
"CoverArt",
"Credentials", "Credentials",
"Fingerprint", "Fingerprint",
"Like", "Like",
+28
View File
@@ -0,0 +1,28 @@
"""Cover-art value object — raw image bytes plus their MIME type.
Crosses the domain boundary between the cover sources (embedded extractor,
Cover Art Archive) and the storage/serving layers. The bytes are the encoded
image as-is; we never decode/resize in Phase 1.
"""
from dataclasses import dataclass
@dataclass(frozen=True, slots=True)
class CoverArt:
data: bytes
content_type: str # "image/jpeg" | "image/png" | …
@property
def extension(self) -> str:
"""File extension for the content type (no leading dot)."""
return _EXT_BY_TYPE.get(self.content_type.lower(), "jpg")
_EXT_BY_TYPE: dict[str, str] = {
"image/jpeg": "jpg",
"image/jpg": "jpg",
"image/png": "png",
"image/webp": "webp",
"image/gif": "gif",
}
+1
View File
@@ -47,6 +47,7 @@ class RecordingMatch:
acoustid: str acoustid: str
score: float score: float
recording_mbid: str | None = None recording_mbid: str | None = None
release_group_mbid: str | None = None
title: str | None = None title: str | None = None
artist: str | None = None artist: str | None = None
album: str | None = None album: str | None = None
+18
View File
@@ -15,6 +15,7 @@ from typing import Protocol
from app.domain.entities import ( from app.domain.entities import (
Album, Album,
AudioTags, AudioTags,
CoverArt,
Credentials, Credentials,
Fingerprint, Fingerprint,
Like, Like,
@@ -188,6 +189,7 @@ class AlbumRepository(Protocol):
year: int | None, year: int | None,
musicbrainz_id: str | None, musicbrainz_id: str | None,
) -> Album: ... ) -> Album: ...
async def set_cover_path(self, album_id: uuid.UUID, cover_path: str) -> None: ...
async def get_by_id(self, album_id: uuid.UUID) -> Album | None: ... async def get_by_id(self, album_id: uuid.UUID) -> Album | None: ...
async def get_many(self, ids: list[uuid.UUID]) -> list[Album]: ... async def get_many(self, ids: list[uuid.UUID]) -> list[Album]: ...
async def count(self, *, artist_id: uuid.UUID | None, q: str | None) -> int: ... async def count(self, *, artist_id: uuid.UUID | None, q: str | None) -> int: ...
@@ -297,3 +299,19 @@ class AcoustIdClient(Protocol):
def is_available(self) -> bool: ... def is_available(self) -> bool: ...
async def lookup(self, fingerprint: Fingerprint) -> RecordingMatch | None: ... async def lookup(self, fingerprint: Fingerprint) -> RecordingMatch | None: ...
class CoverArtExtractor(Protocol):
"""Pulls embedded cover art out of a local audio file (offline, no network).
Returns ``None`` when the file has no picture or can't be parsed — never raises."""
async def extract(self, path: Path) -> CoverArt | None: ...
class CoverArtProvider(Protocol):
"""Fetches cover art from an external service (Cover Art Archive) by
MusicBrainz release-group id. ``is_available`` may gate it off; ``fetch``
returns ``None`` (not found / service down), never raising."""
def is_available(self) -> bool: ...
async def fetch_release_group(self, release_group_mbid: str) -> CoverArt | None: ...
@@ -63,6 +63,12 @@ class SqlAlchemyAlbumRepository:
await self._session.refresh(row) await self._session.refresh(row)
return _to_entity(row) return _to_entity(row)
async def set_cover_path(self, album_id: uuid.UUID, cover_path: str) -> None:
row = await self._session.get(AlbumModel, album_id)
if row is not None:
row.cover_path = cover_path
await self._session.flush()
async def get_by_id(self, album_id: uuid.UUID) -> Album | None: async def get_by_id(self, album_id: uuid.UUID) -> Album | None:
row = await self._session.get(AlbumModel, album_id) row = await self._session.get(AlbumModel, album_id)
return _to_entity(row) if row is not None else None return _to_entity(row) if row is not None else None
+7 -2
View File
@@ -66,7 +66,7 @@ class AcoustIdHttpClient:
) )
resp.raise_for_status() resp.raise_for_status()
payload = resp.json() payload = resp.json()
except (httpx.HTTPError, ValueError): except httpx.HTTPError, ValueError:
log.warning("acoustid_lookup_failed") log.warning("acoustid_lookup_failed")
return None return None
@@ -100,6 +100,7 @@ def _parse_best_match(payload: object) -> RecordingMatch | None:
score = float(best.get("score", 0.0)) score = float(best.get("score", 0.0))
recording_mbid: str | None = None recording_mbid: str | None = None
release_group_mbid: str | None = None
title: str | None = None title: str | None = None
artist: str | None = None artist: str | None = None
album: str | None = None album: str | None = None
@@ -115,13 +116,17 @@ def _parse_best_match(payload: object) -> RecordingMatch | None:
artist = name if isinstance(name, str) else None artist = name if isinstance(name, str) else None
groups = rec.get("releasegroups") groups = rec.get("releasegroups")
if isinstance(groups, list) and groups and isinstance(groups[0], dict): if isinstance(groups, list) and groups and isinstance(groups[0], dict):
gtitle = groups[0].get("title") group = groups[0]
gtitle = group.get("title")
album = gtitle if isinstance(gtitle, str) else None album = gtitle if isinstance(gtitle, str) else None
gid = group.get("id")
release_group_mbid = gid if isinstance(gid, str) else None
return RecordingMatch( return RecordingMatch(
acoustid=acoustid, acoustid=acoustid,
score=score, score=score,
recording_mbid=recording_mbid, recording_mbid=recording_mbid,
release_group_mbid=release_group_mbid,
title=title, title=title,
artist=artist, artist=artist,
album=album, album=album,
@@ -0,0 +1,111 @@
"""MutagenCoverExtractor — pulls embedded cover art from a local audio file.
The offline-first cover source (mirrors the tag pre-pass): a well-tagged file
often already carries front-cover artwork (ID3 ``APIC``, FLAC/OGG picture
blocks, MP4 ``covr``). We read it without any network call. Parsing is blocking,
so it runs in a worker thread. Any failure degrades to ``None`` — never raises.
mutagen ships no type stubs, so its objects are handled as ``Any`` and accessed
defensively (``getattr``) — the format zoo doesn't fit one static shape anyway.
"""
import base64
from pathlib import Path
from typing import Any
import anyio
from mutagen import File as MutagenFile # type: ignore[attr-defined]
from mutagen.flac import Picture
from mutagen.mp4 import MP4Cover
from app.core.logging import get_logger
from app.domain.entities.cover import CoverArt
log = get_logger(__name__)
# MP4 cover format flag → MIME (mutagen exposes an int, not a content type).
_MP4_FORMATS: dict[int, str] = {
MP4Cover.FORMAT_JPEG: "image/jpeg",
MP4Cover.FORMAT_PNG: "image/png",
}
_FRONT_COVER = 3 # APIC/Picture "type" value for the front cover
class MutagenCoverExtractor:
"""Implements :class:`app.domain.ports.CoverArtExtractor`."""
async def extract(self, path: Path) -> CoverArt | None:
try:
return await anyio.to_thread.run_sync(self._extract_sync, path)
except Exception:
log.warning("cover_extract_failed", path=str(path))
return None
def _extract_sync(self, path: Path) -> CoverArt | None:
audio: Any = MutagenFile(str(path))
if audio is None:
return None
# FLAC / OGG-FLAC: typed picture blocks on the file object.
pictures = getattr(audio, "pictures", None)
if pictures:
cover = _from_picture(_front_or_first(pictures))
if cover is not None:
return cover
tags = audio.tags
if tags is None:
return None
# MP3 / anything with ID3 frames: APIC frames keyed as "APIC:...".
apics = [frame for frame in tags.values() if frame.__class__.__name__ == "APIC"]
if apics:
cover = _from_picture(_front_or_first(apics))
if cover is not None:
return cover
get = getattr(tags, "get", None)
if get is None:
return None
# MP4 / M4A: "covr" atom holds a list of MP4Cover (a bytes subclass).
covr = get("covr")
if covr:
mp4_cover = covr[0]
content_type = _MP4_FORMATS.get(getattr(mp4_cover, "imageformat", -1), "image/jpeg")
return CoverArt(data=bytes(mp4_cover), content_type=content_type)
# OGG Vorbis: base64 picture block in METADATA_BLOCK_PICTURE.
block = get("metadata_block_picture")
if block:
cover = _from_picture(_decode_vorbis_picture(block[0]))
if cover is not None:
return cover
return None
def _from_picture(picture: Any) -> CoverArt | None:
"""Build a :class:`CoverArt` from a mutagen picture/APIC frame, or ``None``."""
if picture is None:
return None
data = getattr(picture, "data", None)
if not data:
return None
mime = getattr(picture, "mime", None) or "image/jpeg"
return CoverArt(data=bytes(data), content_type=str(mime))
def _front_or_first(pictures: list[Any]) -> Any:
"""Prefer the front-cover picture (type 3), else the first available."""
for pic in pictures:
if getattr(pic, "type", None) == _FRONT_COVER:
return pic
return pictures[0] if pictures else None
def _decode_vorbis_picture(encoded: str) -> Any:
try:
return Picture(base64.b64decode(encoded)) # type: ignore[no-untyped-call]
except Exception:
return None
+83
View File
@@ -0,0 +1,83 @@
"""CoverArtArchiveClient — fetches front cover art from the Cover Art Archive.
The network fallback when a file carries no embedded artwork: given a
MusicBrainz **release-group** id (supplied by the AcoustID lookup), request the
front image from ``coverartarchive.org``. The CAA redirects to the Internet
Archive, so redirects are followed. ``thumbnail`` 500px keeps payloads small.
Graceful degradation (CLAUDE.md): no release-group id → never called; any
network/HTTP error (incl. 404 "no cover") → returns ``None``, never raises. A
small inter-call delay respects the shared MusicBrainz/CAA infrastructure.
"""
import asyncio
import time
import httpx
from app.core.logging import get_logger
from app.domain.entities.cover import CoverArt
log = get_logger(__name__)
_DEFAULT_BASE_URL = "https://coverartarchive.org"
_TIMEOUT_SECONDS = 15.0
_MIN_INTERVAL_SECONDS = 1.0 # CAA piggybacks on MusicBrainz infra; stay polite
_MAX_BYTES = 10 * 1024 * 1024 # ignore absurdly large images
class CoverArtArchiveClient:
"""Implements :class:`app.domain.ports.CoverArtProvider`."""
_throttle_lock = asyncio.Lock()
_last_call_monotonic = 0.0
def __init__(
self,
*,
user_agent: str,
enabled: bool = True,
base_url: str = _DEFAULT_BASE_URL,
) -> None:
self._user_agent = user_agent
self._enabled = enabled
self._base_url = base_url.rstrip("/")
def is_available(self) -> bool:
return self._enabled
async def fetch_release_group(self, release_group_mbid: str) -> CoverArt | None:
if not self._enabled or not release_group_mbid:
return None
url = f"{self._base_url}/release-group/{release_group_mbid}/front-500"
try:
await self._throttle()
async with httpx.AsyncClient(
timeout=_TIMEOUT_SECONDS,
follow_redirects=True,
headers={"User-Agent": self._user_agent},
) as client:
resp = await client.get(url)
if resp.status_code == 404:
return None # no cover for this release group — normal, not an error
resp.raise_for_status()
except httpx.HTTPError:
log.warning("coverart_fetch_failed", release_group=release_group_mbid)
return None
data = resp.content
if not data or len(data) > _MAX_BYTES:
return None
content_type = resp.headers.get("content-type", "image/jpeg").split(";")[0].strip()
if not content_type.startswith("image/"):
return None
return CoverArt(data=data, content_type=content_type)
@classmethod
async def _throttle(cls) -> None:
async with cls._throttle_lock:
elapsed = time.monotonic() - cls._last_call_monotonic
wait = _MIN_INTERVAL_SECONDS - elapsed
if wait > 0:
await asyncio.sleep(wait)
cls._last_call_monotonic = time.monotonic()
+2 -2
View File
@@ -41,7 +41,7 @@ class FpcalcFingerprinter:
) )
async with asyncio.timeout(_TIMEOUT_SECONDS): async with asyncio.timeout(_TIMEOUT_SECONDS):
stdout, _stderr = await proc.communicate() stdout, _stderr = await proc.communicate()
except (TimeoutError, OSError): except TimeoutError, OSError:
log.warning("fpcalc_failed", path=str(path)) log.warning("fpcalc_failed", path=str(path))
return None return None
@@ -53,7 +53,7 @@ class FpcalcFingerprinter:
data = json.loads(stdout) data = json.loads(stdout)
fingerprint = str(data["fingerprint"]) fingerprint = str(data["fingerprint"])
duration = round(float(data["duration"])) duration = round(float(data["duration"]))
except (json.JSONDecodeError, KeyError, ValueError): except json.JSONDecodeError, KeyError, ValueError:
log.warning("fpcalc_bad_output", path=str(path)) log.warning("fpcalc_bad_output", path=str(path))
return None return None
+1 -3
View File
@@ -84,9 +84,7 @@ class S3FileStorage:
async def _stream() -> AsyncGenerator[bytes]: async def _stream() -> AsyncGenerator[bytes]:
async with self._client() as s3: async with self._client() as s3:
try: try:
resp = await s3.get_object( resp = await s3.get_object(Bucket=_bucket, Key=_key, Range=range_header)
Bucket=_bucket, Key=_key, Range=range_header
)
except ClientError as exc: except ClientError as exc:
raise StorageError(str(exc)) from exc raise StorageError(str(exc)) from exc
body = resp["Body"] body = resp["Body"]
+10 -3
View File
@@ -19,6 +19,8 @@ from app.infrastructure.db.repositories import (
SqlAlchemyTrackRepository, SqlAlchemyTrackRepository,
) )
from app.infrastructure.metadata.acoustid import AcoustIdHttpClient from app.infrastructure.metadata.acoustid import AcoustIdHttpClient
from app.infrastructure.metadata.cover_extractor import MutagenCoverExtractor
from app.infrastructure.metadata.coverart import CoverArtArchiveClient
from app.infrastructure.metadata.fingerprint import FpcalcFingerprinter from app.infrastructure.metadata.fingerprint import FpcalcFingerprinter
from app.infrastructure.metadata.tags import MutagenTagReader from app.infrastructure.metadata.tags import MutagenTagReader
from app.infrastructure.storage.provider import get_file_storage from app.infrastructure.storage.provider import get_file_storage
@@ -28,14 +30,17 @@ log = get_logger("worker.enrich")
async def enrich_track(_ctx: dict[str, Any], *, track_id: str) -> dict[str, Any]: async def enrich_track(_ctx: dict[str, Any], *, track_id: str) -> dict[str, Any]:
settings = get_settings() settings = get_settings()
api_key = ( api_key = settings.acoustid_api_key.get_secret_value() if settings.acoustid_api_key else None
settings.acoustid_api_key.get_secret_value() if settings.acoustid_api_key else None
)
acoustid = AcoustIdHttpClient( acoustid = AcoustIdHttpClient(
api_key=api_key, api_key=api_key,
user_agent=settings.musicbrainz_user_agent, user_agent=settings.musicbrainz_user_agent,
api_url=settings.acoustid_api_url, api_url=settings.acoustid_api_url,
) )
cover_provider = CoverArtArchiveClient(
user_agent=settings.musicbrainz_user_agent,
enabled=settings.coverart_enabled,
base_url=settings.coverart_base_url,
)
async with session_scope() as session: async with session_scope() as session:
service = MetadataEnrichmentService( service = MetadataEnrichmentService(
@@ -46,6 +51,8 @@ async def enrich_track(_ctx: dict[str, Any], *, track_id: str) -> dict[str, Any]
tag_reader=MutagenTagReader(), tag_reader=MutagenTagReader(),
fingerprinter=FpcalcFingerprinter(settings.fpcalc_path), fingerprinter=FpcalcFingerprinter(settings.fpcalc_path),
acoustid=acoustid, acoustid=acoustid,
cover_extractor=MutagenCoverExtractor(),
cover_provider=cover_provider,
) )
result = await service.enrich(uuid.UUID(track_id)) result = await service.enrich(uuid.UUID(track_id))
+1
View File
@@ -33,6 +33,7 @@ def test_parses_full_recording() -> None:
assert match.title == "One More Time" assert match.title == "One More Time"
assert match.artist == "Daft Punk" assert match.artist == "Daft Punk"
assert match.album == "Discovery" assert match.album == "Discovery"
assert match.release_group_mbid == "rg1"
assert match.score == 0.97 assert match.score == 0.97
+194
View File
@@ -0,0 +1,194 @@
"""Integration tests for the native cover-art endpoints.
Seeds an album with a stored cover, then exercises the ``/api/v1`` album/track
cover endpoints (token auth, 404 when absent). Requires a reachable Postgres;
skips otherwise.
"""
import asyncio
import os
import uuid
from collections.abc import AsyncIterator
from pathlib import Path
import pytest
from app.core.config import get_settings
from app.infrastructure.db import Base, dispose_engine, get_engine, session_scope
from app.infrastructure.db.repositories import (
SqlAlchemyAlbumRepository,
SqlAlchemyArtistRepository,
SqlAlchemyRefreshTokenRepository,
SqlAlchemyTrackRepository,
SqlAlchemyUserRepository,
)
from app.infrastructure.storage.provider import get_file_storage
from asgi_lifespan import LifespanManager
from httpx import ASGITransport, AsyncClient
pytestmark = pytest.mark.asyncio
# A minimal valid 1x1 PNG.
_PNG_BYTES = bytes.fromhex(
"89504e470d0a1a0a0000000d4948445200000001000000010802000000907753"
"de0000000c4944415408d763f8cfc0f01f0005000155a2b4f60000000049454e44ae426082"
)
_db_reachable_cache: bool | None = None
async def _db_reachable() -> bool:
global _db_reachable_cache
if _db_reachable_cache is not None:
return _db_reachable_cache
from sqlalchemy import text
try:
async with asyncio.timeout(3):
async with get_engine().connect() as conn:
await conn.execute(text("SELECT 1"))
_db_reachable_cache = True
except Exception:
_db_reachable_cache = False
return _db_reachable_cache
async def _seed_album_with_cover(*, with_cover: bool) -> tuple[uuid.UUID, uuid.UUID]:
"""Create an artist + album (+ optional cover file) + track. Returns
``(album_id, track_id)``."""
async with session_scope() as session:
artist = await SqlAlchemyArtistRepository(session).get_or_create("Coverart Artist")
album = await SqlAlchemyAlbumRepository(session).get_or_create(
title="Coverart Album", artist_id=artist.id, year=2020, musicbrainz_id=None
)
track = await SqlAlchemyTrackRepository(session).add(
id=uuid.uuid4(),
title="Coverart Track",
artist_id=artist.id,
storage_uri="tracks/zz/cover-track.mp3",
file_format="mp3",
file_size=10,
source="upload",
source_id="cover-src",
metadata_status="enriched",
added_by=None,
)
# Link the track to the album (add() doesn't take album_id).
await SqlAlchemyTrackRepository(session).apply_enrichment(
track.id,
title="Coverart Track",
artist_id=artist.id,
album_id=album.id,
genre=None,
year=2020,
track_number=1,
duration_seconds=1,
bitrate=None,
acoustid_fingerprint=None,
musicbrainz_id=None,
metadata_status="enriched",
)
if with_cover:
key = f"covers/{album.id}.png"
import tempfile
with tempfile.NamedTemporaryFile(suffix=".png") as tmp:
tmp.write(_PNG_BYTES)
tmp.flush()
await get_file_storage().save_file(key, Path(tmp.name))
await SqlAlchemyAlbumRepository(session).set_cover_path(album.id, key)
return album.id, track.id
@pytest.fixture
async def api(tmp_path: Path) -> AsyncIterator[AsyncClient]:
if not await _db_reachable():
pytest.skip("Postgres not reachable — integration test skipped.")
os.environ["MEDIA_PATH"] = str(tmp_path)
get_settings.cache_clear()
import app.infrastructure.storage.provider as _storage_provider
_storage_provider._storage = None
try:
async with get_engine().begin() as conn:
await conn.run_sync(Base.metadata.drop_all)
await conn.run_sync(Base.metadata.create_all)
from app.application.user_service import UserService
from app.core.security import Argon2PasswordHasher
async with session_scope() as session:
await UserService(
users=SqlAlchemyUserRepository(session),
refresh_tokens=SqlAlchemyRefreshTokenRepository(session),
hasher=Argon2PasswordHasher(),
).create_user(username="testuser", password="testpass1", is_superuser=False)
from app.main import create_app
app = create_app()
async with LifespanManager(app):
transport = ASGITransport(app=app)
async with AsyncClient(transport=transport, base_url="http://test") as client:
yield client
async with get_engine().begin() as conn:
await conn.run_sync(Base.metadata.drop_all)
await dispose_engine()
finally:
_storage_provider._storage = None
os.environ.pop("MEDIA_PATH", None)
get_settings.cache_clear()
async def _login(api: AsyncClient) -> str:
resp = await api.post(
"/api/v1/auth/login", json={"username": "testuser", "password": "testpass1"}
)
assert resp.status_code == 200
return str(resp.json()["access_token"])
async def test_album_cover_served(api: AsyncClient) -> None:
token = await _login(api)
album_id, _ = await _seed_album_with_cover(with_cover=True)
resp = await api.get(f"/api/v1/albums/{album_id}/cover?token={token}")
assert resp.status_code == 200, resp.text
assert resp.headers["content-type"] == "image/png"
assert resp.content == _PNG_BYTES
async def test_track_cover_served_from_album(api: AsyncClient) -> None:
token = await _login(api)
_, track_id = await _seed_album_with_cover(with_cover=True)
resp = await api.get(f"/api/v1/tracks/{track_id}/cover?token={token}")
assert resp.status_code == 200, resp.text
assert resp.headers["content-type"] == "image/png"
assert resp.content == _PNG_BYTES
async def test_album_without_cover_is_404(api: AsyncClient) -> None:
token = await _login(api)
album_id, _ = await _seed_album_with_cover(with_cover=False)
resp = await api.get(f"/api/v1/albums/{album_id}/cover?token={token}")
assert resp.status_code == 404
async def test_cover_requires_auth(api: AsyncClient) -> None:
album_id, _ = await _seed_album_with_cover(with_cover=True)
resp = await api.get(f"/api/v1/albums/{album_id}/cover")
assert resp.status_code == 401
async def test_album_appears_with_has_cover_flag(api: AsyncClient) -> None:
token = await _login(api)
album_id, _ = await _seed_album_with_cover(with_cover=True)
resp = await api.get(f"/api/v1/albums/{album_id}", headers={"Authorization": f"Bearer {token}"})
assert resp.status_code == 200
assert resp.json()["has_cover"] is True
+150 -2
View File
@@ -15,6 +15,7 @@ import pytest
from app.application.metadata_service import MetadataEnrichmentService from app.application.metadata_service import MetadataEnrichmentService
from app.domain.entities import Artist, Track from app.domain.entities import Artist, Track
from app.domain.entities.album import Album from app.domain.entities.album import Album
from app.domain.entities.cover import CoverArt
from app.domain.entities.metadata import AudioTags, Fingerprint, RecordingMatch from app.domain.entities.metadata import AudioTags, Fingerprint, RecordingMatch
pytestmark = pytest.mark.asyncio pytestmark = pytest.mark.asyncio
@@ -67,8 +68,10 @@ class FakeArtistRepo:
class FakeAlbumRepo: class FakeAlbumRepo:
def __init__(self) -> None: def __init__(self, *, cover_path: str | None = None) -> None:
self.created: list[tuple[str, uuid.UUID]] = [] self.created: list[tuple[str, uuid.UUID]] = []
self.covers: dict[uuid.UUID, str] = {}
self._existing_cover = cover_path
async def get_or_create( async def get_or_create(
self, *, title: str, artist_id: uuid.UUID, year: int | None, musicbrainz_id: str | None self, *, title: str, artist_id: uuid.UUID, year: int | None, musicbrainz_id: str | None
@@ -80,18 +83,52 @@ class FakeAlbumRepo:
title=title, title=title,
artist_id=artist_id, artist_id=artist_id,
year=year, year=year,
cover_path=None, cover_path=self._existing_cover,
musicbrainz_id=musicbrainz_id, musicbrainz_id=musicbrainz_id,
created_at=now, created_at=now,
updated_at=now, updated_at=now,
) )
async def set_cover_path(self, album_id: uuid.UUID, cover_path: str) -> None:
self.covers[album_id] = cover_path
class FakeStorage: class FakeStorage:
def __init__(self) -> None:
self.saved: list[str] = []
@asynccontextmanager @asynccontextmanager
async def as_local_path(self, key: str) -> AsyncIterator[Path]: async def as_local_path(self, key: str) -> AsyncIterator[Path]:
yield Path("/tmp") / key yield Path("/tmp") / key
async def save_file(self, key: str, src_path: Path) -> int:
self.saved.append(key)
return 1
class FakeCoverExtractor:
def __init__(self, cover: CoverArt | None) -> None:
self._cover = cover
self.calls = 0
async def extract(self, path: Path) -> CoverArt | None:
self.calls += 1
return self._cover
class FakeCoverProvider:
def __init__(self, cover: CoverArt | None, *, available: bool = True) -> None:
self._cover = cover
self._available = available
self.calls = 0
def is_available(self) -> bool:
return self._available
async def fetch_release_group(self, release_group_mbid: str) -> CoverArt | None:
self.calls += 1
return self._cover
class FakeTagReader: class FakeTagReader:
def __init__(self, tags: AudioTags | None) -> None: def __init__(self, tags: AudioTags | None) -> None:
@@ -281,3 +318,114 @@ async def test_fingerprint_skipped_when_acoustid_unavailable() -> None:
# tags still enrich, but no AcoustID call is attempted # tags still enrich, but no AcoustID call is attempted
assert acoustid.calls == 0 assert acoustid.calls == 0
assert result.status == "enriched" assert result.status == "enriched"
# -- cover-art resolution -----------------------------------------------------
_PNG = CoverArt(data=b"\x89PNG\r\n", content_type="image/png")
_JPG = CoverArt(data=b"\xff\xd8\xff", content_type="image/jpeg")
def _cover_service(
*,
track: Track,
tags: AudioTags | None = None,
match: RecordingMatch | None = None,
fp: Fingerprint | None = None,
extractor: FakeCoverExtractor | None = None,
provider: FakeCoverProvider | None = None,
existing_cover: str | None = None,
) -> tuple[MetadataEnrichmentService, FakeAlbumRepo, FakeStorage]:
albums = FakeAlbumRepo(cover_path=existing_cover)
storage = FakeStorage()
service = MetadataEnrichmentService(
tracks=FakeTrackRepo(track), # type: ignore[arg-type]
artists=FakeArtistRepo(), # type: ignore[arg-type]
albums=albums, # type: ignore[arg-type]
storage=storage, # type: ignore[arg-type]
tag_reader=FakeTagReader(tags), # type: ignore[arg-type]
fingerprinter=FakeFingerprinter(fp), # type: ignore[arg-type]
acoustid=FakeAcoustId(match), # type: ignore[arg-type]
cover_extractor=extractor, # type: ignore[arg-type]
cover_provider=provider, # type: ignore[arg-type]
)
return service, albums, storage
async def test_cover_extracted_from_embedded_art() -> None:
track = _track()
extractor = FakeCoverExtractor(_PNG)
provider = FakeCoverProvider(_JPG)
service, albums, storage = _cover_service(
track=track, tags=AudioTags(album="The Wall", artist="PF"),
extractor=extractor, provider=provider,
)
await service.enrich(track.id)
assert extractor.calls == 1
assert provider.calls == 0 # embedded art wins → no network fetch
assert len(albums.covers) == 1
key = next(iter(albums.covers.values()))
assert key.startswith("covers/") and key.endswith(".png")
assert storage.saved == [key]
async def test_cover_falls_back_to_archive() -> None:
track = _track()
extractor = FakeCoverExtractor(None) # no embedded art
provider = FakeCoverProvider(_JPG)
match = RecordingMatch(acoustid="ac", score=1.0, release_group_mbid="rg-123", album="The Wall")
fp = Fingerprint(fingerprint="AQAA", duration_seconds=200)
service, albums, storage = _cover_service(
track=track, tags=AudioTags(album="The Wall", artist="PF"),
match=match, fp=fp, extractor=extractor, provider=provider,
)
await service.enrich(track.id)
assert extractor.calls == 1
assert provider.calls == 1
key = next(iter(albums.covers.values()))
assert key.endswith(".jpg")
assert storage.saved == [key]
async def test_cover_not_fetched_without_release_group() -> None:
track = _track()
provider = FakeCoverProvider(_JPG)
service, albums, _ = _cover_service(
track=track, tags=AudioTags(album="The Wall", artist="PF"),
extractor=FakeCoverExtractor(None), provider=provider,
)
await service.enrich(track.id)
assert provider.calls == 0 # no release-group mbid → nothing to look up
assert albums.covers == {}
async def test_existing_cover_is_not_overwritten() -> None:
track = _track()
extractor = FakeCoverExtractor(_PNG)
service, albums, storage = _cover_service(
track=track, tags=AudioTags(album="The Wall", artist="PF"),
extractor=extractor, existing_cover="covers/old.jpg",
)
await service.enrich(track.id)
assert extractor.calls == 0 # album already has a cover → skip entirely
assert albums.covers == {}
assert storage.saved == []
async def test_cover_skipped_when_no_album() -> None:
track = _track()
extractor = FakeCoverExtractor(_PNG)
# no album tag and no match → no album resolved → no cover work
service, _albums, storage = _cover_service(track=track, extractor=extractor)
await service.enrich(track.id)
assert extractor.calls == 0
assert storage.saved == []
+1 -3
View File
@@ -175,9 +175,7 @@ async def test_stream_range(api: AsyncClient) -> None:
async def test_stream_not_found(api: AsyncClient) -> None: async def test_stream_not_found(api: AsyncClient) -> None:
token = await _login(api) token = await _login(api)
resp = await api.get( resp = await api.get(f"/api/v1/stream/00000000-0000-0000-0000-000000000000?token={token}")
f"/api/v1/stream/00000000-0000-0000-0000-000000000000?token={token}"
)
assert resp.status_code == 404 assert resp.status_code == 404