feat: cover-art pipeline (§1D)
Resolve, store and serve album cover art.
Sources (tag-first, mirroring enrichment): embedded artwork extracted
offline via mutagen (ID3 APIC / FLAC+OGG Picture / MP4 covr), then Cover
Art Archive by release-group MBID as a network fallback. Resolution runs
inside MetadataEnrichmentService after album resolution, only when the
album has no cover yet (idempotent, never overwrites), and is best-effort
so a cover failure never affects enrichment status.
- CoverArt value object + CoverArtExtractor/CoverArtProvider ports
- MutagenCoverExtractor + CoverArtArchiveClient adapters
- AcoustID parser now captures release_group_mbid
- Covers stored via FileStorage at covers/{album_id}.{ext} (local + S3)
- AlbumRepository.set_cover_path
- Serve real covers: GET /api/v1/albums|tracks/{id}/cover (StreamUser,
?token=), Subsonic getCoverArt (placeholder fallback)
- has_cover flag on AlbumOut/TrackOut
- coverart_enabled / coverart_base_url settings
- tests: cover resolution units + release_group parse + DB-backed
test_cover_api.py (139 green via make test-api)
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -66,7 +66,7 @@ class AcoustIdHttpClient:
|
||||
)
|
||||
resp.raise_for_status()
|
||||
payload = resp.json()
|
||||
except (httpx.HTTPError, ValueError):
|
||||
except httpx.HTTPError, ValueError:
|
||||
log.warning("acoustid_lookup_failed")
|
||||
return None
|
||||
|
||||
@@ -100,6 +100,7 @@ def _parse_best_match(payload: object) -> RecordingMatch | None:
|
||||
score = float(best.get("score", 0.0))
|
||||
|
||||
recording_mbid: str | None = None
|
||||
release_group_mbid: str | None = None
|
||||
title: str | None = None
|
||||
artist: str | None = None
|
||||
album: str | None = None
|
||||
@@ -115,13 +116,17 @@ def _parse_best_match(payload: object) -> RecordingMatch | None:
|
||||
artist = name if isinstance(name, str) else None
|
||||
groups = rec.get("releasegroups")
|
||||
if isinstance(groups, list) and groups and isinstance(groups[0], dict):
|
||||
gtitle = groups[0].get("title")
|
||||
group = groups[0]
|
||||
gtitle = group.get("title")
|
||||
album = gtitle if isinstance(gtitle, str) else None
|
||||
gid = group.get("id")
|
||||
release_group_mbid = gid if isinstance(gid, str) else None
|
||||
|
||||
return RecordingMatch(
|
||||
acoustid=acoustid,
|
||||
score=score,
|
||||
recording_mbid=recording_mbid,
|
||||
release_group_mbid=release_group_mbid,
|
||||
title=title,
|
||||
artist=artist,
|
||||
album=album,
|
||||
|
||||
@@ -0,0 +1,111 @@
|
||||
"""MutagenCoverExtractor — pulls embedded cover art from a local audio file.
|
||||
|
||||
The offline-first cover source (mirrors the tag pre-pass): a well-tagged file
|
||||
often already carries front-cover artwork (ID3 ``APIC``, FLAC/OGG picture
|
||||
blocks, MP4 ``covr``). We read it without any network call. Parsing is blocking,
|
||||
so it runs in a worker thread. Any failure degrades to ``None`` — never raises.
|
||||
|
||||
mutagen ships no type stubs, so its objects are handled as ``Any`` and accessed
|
||||
defensively (``getattr``) — the format zoo doesn't fit one static shape anyway.
|
||||
"""
|
||||
|
||||
import base64
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import anyio
|
||||
from mutagen import File as MutagenFile # type: ignore[attr-defined]
|
||||
from mutagen.flac import Picture
|
||||
from mutagen.mp4 import MP4Cover
|
||||
|
||||
from app.core.logging import get_logger
|
||||
from app.domain.entities.cover import CoverArt
|
||||
|
||||
log = get_logger(__name__)
|
||||
|
||||
# MP4 cover format flag → MIME (mutagen exposes an int, not a content type).
|
||||
_MP4_FORMATS: dict[int, str] = {
|
||||
MP4Cover.FORMAT_JPEG: "image/jpeg",
|
||||
MP4Cover.FORMAT_PNG: "image/png",
|
||||
}
|
||||
_FRONT_COVER = 3 # APIC/Picture "type" value for the front cover
|
||||
|
||||
|
||||
class MutagenCoverExtractor:
|
||||
"""Implements :class:`app.domain.ports.CoverArtExtractor`."""
|
||||
|
||||
async def extract(self, path: Path) -> CoverArt | None:
|
||||
try:
|
||||
return await anyio.to_thread.run_sync(self._extract_sync, path)
|
||||
except Exception:
|
||||
log.warning("cover_extract_failed", path=str(path))
|
||||
return None
|
||||
|
||||
def _extract_sync(self, path: Path) -> CoverArt | None:
|
||||
audio: Any = MutagenFile(str(path))
|
||||
if audio is None:
|
||||
return None
|
||||
|
||||
# FLAC / OGG-FLAC: typed picture blocks on the file object.
|
||||
pictures = getattr(audio, "pictures", None)
|
||||
if pictures:
|
||||
cover = _from_picture(_front_or_first(pictures))
|
||||
if cover is not None:
|
||||
return cover
|
||||
|
||||
tags = audio.tags
|
||||
if tags is None:
|
||||
return None
|
||||
|
||||
# MP3 / anything with ID3 frames: APIC frames keyed as "APIC:...".
|
||||
apics = [frame for frame in tags.values() if frame.__class__.__name__ == "APIC"]
|
||||
if apics:
|
||||
cover = _from_picture(_front_or_first(apics))
|
||||
if cover is not None:
|
||||
return cover
|
||||
|
||||
get = getattr(tags, "get", None)
|
||||
if get is None:
|
||||
return None
|
||||
|
||||
# MP4 / M4A: "covr" atom holds a list of MP4Cover (a bytes subclass).
|
||||
covr = get("covr")
|
||||
if covr:
|
||||
mp4_cover = covr[0]
|
||||
content_type = _MP4_FORMATS.get(getattr(mp4_cover, "imageformat", -1), "image/jpeg")
|
||||
return CoverArt(data=bytes(mp4_cover), content_type=content_type)
|
||||
|
||||
# OGG Vorbis: base64 picture block in METADATA_BLOCK_PICTURE.
|
||||
block = get("metadata_block_picture")
|
||||
if block:
|
||||
cover = _from_picture(_decode_vorbis_picture(block[0]))
|
||||
if cover is not None:
|
||||
return cover
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def _from_picture(picture: Any) -> CoverArt | None:
|
||||
"""Build a :class:`CoverArt` from a mutagen picture/APIC frame, or ``None``."""
|
||||
if picture is None:
|
||||
return None
|
||||
data = getattr(picture, "data", None)
|
||||
if not data:
|
||||
return None
|
||||
mime = getattr(picture, "mime", None) or "image/jpeg"
|
||||
return CoverArt(data=bytes(data), content_type=str(mime))
|
||||
|
||||
|
||||
def _front_or_first(pictures: list[Any]) -> Any:
|
||||
"""Prefer the front-cover picture (type 3), else the first available."""
|
||||
for pic in pictures:
|
||||
if getattr(pic, "type", None) == _FRONT_COVER:
|
||||
return pic
|
||||
return pictures[0] if pictures else None
|
||||
|
||||
|
||||
def _decode_vorbis_picture(encoded: str) -> Any:
|
||||
try:
|
||||
return Picture(base64.b64decode(encoded)) # type: ignore[no-untyped-call]
|
||||
except Exception:
|
||||
return None
|
||||
@@ -0,0 +1,83 @@
|
||||
"""CoverArtArchiveClient — fetches front cover art from the Cover Art Archive.
|
||||
|
||||
The network fallback when a file carries no embedded artwork: given a
|
||||
MusicBrainz **release-group** id (supplied by the AcoustID lookup), request the
|
||||
front image from ``coverartarchive.org``. The CAA redirects to the Internet
|
||||
Archive, so redirects are followed. ``thumbnail`` 500px keeps payloads small.
|
||||
|
||||
Graceful degradation (CLAUDE.md): no release-group id → never called; any
|
||||
network/HTTP error (incl. 404 "no cover") → returns ``None``, never raises. A
|
||||
small inter-call delay respects the shared MusicBrainz/CAA infrastructure.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import time
|
||||
|
||||
import httpx
|
||||
|
||||
from app.core.logging import get_logger
|
||||
from app.domain.entities.cover import CoverArt
|
||||
|
||||
log = get_logger(__name__)
|
||||
|
||||
_DEFAULT_BASE_URL = "https://coverartarchive.org"
|
||||
_TIMEOUT_SECONDS = 15.0
|
||||
_MIN_INTERVAL_SECONDS = 1.0 # CAA piggybacks on MusicBrainz infra; stay polite
|
||||
_MAX_BYTES = 10 * 1024 * 1024 # ignore absurdly large images
|
||||
|
||||
|
||||
class CoverArtArchiveClient:
|
||||
"""Implements :class:`app.domain.ports.CoverArtProvider`."""
|
||||
|
||||
_throttle_lock = asyncio.Lock()
|
||||
_last_call_monotonic = 0.0
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
user_agent: str,
|
||||
enabled: bool = True,
|
||||
base_url: str = _DEFAULT_BASE_URL,
|
||||
) -> None:
|
||||
self._user_agent = user_agent
|
||||
self._enabled = enabled
|
||||
self._base_url = base_url.rstrip("/")
|
||||
|
||||
def is_available(self) -> bool:
|
||||
return self._enabled
|
||||
|
||||
async def fetch_release_group(self, release_group_mbid: str) -> CoverArt | None:
|
||||
if not self._enabled or not release_group_mbid:
|
||||
return None
|
||||
url = f"{self._base_url}/release-group/{release_group_mbid}/front-500"
|
||||
try:
|
||||
await self._throttle()
|
||||
async with httpx.AsyncClient(
|
||||
timeout=_TIMEOUT_SECONDS,
|
||||
follow_redirects=True,
|
||||
headers={"User-Agent": self._user_agent},
|
||||
) as client:
|
||||
resp = await client.get(url)
|
||||
if resp.status_code == 404:
|
||||
return None # no cover for this release group — normal, not an error
|
||||
resp.raise_for_status()
|
||||
except httpx.HTTPError:
|
||||
log.warning("coverart_fetch_failed", release_group=release_group_mbid)
|
||||
return None
|
||||
|
||||
data = resp.content
|
||||
if not data or len(data) > _MAX_BYTES:
|
||||
return None
|
||||
content_type = resp.headers.get("content-type", "image/jpeg").split(";")[0].strip()
|
||||
if not content_type.startswith("image/"):
|
||||
return None
|
||||
return CoverArt(data=data, content_type=content_type)
|
||||
|
||||
@classmethod
|
||||
async def _throttle(cls) -> None:
|
||||
async with cls._throttle_lock:
|
||||
elapsed = time.monotonic() - cls._last_call_monotonic
|
||||
wait = _MIN_INTERVAL_SECONDS - elapsed
|
||||
if wait > 0:
|
||||
await asyncio.sleep(wait)
|
||||
cls._last_call_monotonic = time.monotonic()
|
||||
@@ -41,7 +41,7 @@ class FpcalcFingerprinter:
|
||||
)
|
||||
async with asyncio.timeout(_TIMEOUT_SECONDS):
|
||||
stdout, _stderr = await proc.communicate()
|
||||
except (TimeoutError, OSError):
|
||||
except TimeoutError, OSError:
|
||||
log.warning("fpcalc_failed", path=str(path))
|
||||
return None
|
||||
|
||||
@@ -53,7 +53,7 @@ class FpcalcFingerprinter:
|
||||
data = json.loads(stdout)
|
||||
fingerprint = str(data["fingerprint"])
|
||||
duration = round(float(data["duration"]))
|
||||
except (json.JSONDecodeError, KeyError, ValueError):
|
||||
except json.JSONDecodeError, KeyError, ValueError:
|
||||
log.warning("fpcalc_bad_output", path=str(path))
|
||||
return None
|
||||
|
||||
|
||||
Reference in New Issue
Block a user