"""MutagenCoverExtractor — pulls embedded cover art from a local audio file. The offline-first cover source (mirrors the tag pre-pass): a well-tagged file often already carries front-cover artwork (ID3 ``APIC``, FLAC/OGG picture blocks, MP4 ``covr``). We read it without any network call. Parsing is blocking, so it runs in a worker thread. Any failure degrades to ``None`` — never raises. mutagen ships no type stubs, so its objects are handled as ``Any`` and accessed defensively (``getattr``) — the format zoo doesn't fit one static shape anyway. """ import base64 from pathlib import Path from typing import Any import anyio from mutagen import File as MutagenFile # type: ignore[attr-defined] from mutagen.flac import Picture from mutagen.mp4 import MP4Cover from app.core.logging import get_logger from app.domain.entities.cover import CoverArt log = get_logger(__name__) # MP4 cover format flag → MIME (mutagen exposes an int, not a content type). _MP4_FORMATS: dict[int, str] = { MP4Cover.FORMAT_JPEG: "image/jpeg", MP4Cover.FORMAT_PNG: "image/png", } _FRONT_COVER = 3 # APIC/Picture "type" value for the front cover class MutagenCoverExtractor: """Implements :class:`app.domain.ports.CoverArtExtractor`.""" async def extract(self, path: Path) -> CoverArt | None: try: return await anyio.to_thread.run_sync(self._extract_sync, path) except Exception: log.warning("cover_extract_failed", path=str(path)) return None def _extract_sync(self, path: Path) -> CoverArt | None: audio: Any = MutagenFile(str(path)) if audio is None: return None # FLAC / OGG-FLAC: typed picture blocks on the file object. pictures = getattr(audio, "pictures", None) if pictures: cover = _from_picture(_front_or_first(pictures)) if cover is not None: return cover tags = audio.tags if tags is None: return None # MP3 / anything with ID3 frames: APIC frames keyed as "APIC:...". apics = [frame for frame in tags.values() if frame.__class__.__name__ == "APIC"] if apics: cover = _from_picture(_front_or_first(apics)) if cover is not None: return cover get = getattr(tags, "get", None) if get is None: return None # MP4 / M4A: "covr" atom holds a list of MP4Cover (a bytes subclass). covr = get("covr") if covr: mp4_cover = covr[0] content_type = _MP4_FORMATS.get(getattr(mp4_cover, "imageformat", -1), "image/jpeg") return CoverArt(data=bytes(mp4_cover), content_type=content_type) # OGG Vorbis: base64 picture block in METADATA_BLOCK_PICTURE. block = get("metadata_block_picture") if block: cover = _from_picture(_decode_vorbis_picture(block[0])) if cover is not None: return cover return None def _from_picture(picture: Any) -> CoverArt | None: """Build a :class:`CoverArt` from a mutagen picture/APIC frame, or ``None``.""" if picture is None: return None data = getattr(picture, "data", None) if not data: return None mime = getattr(picture, "mime", None) or "image/jpeg" return CoverArt(data=bytes(data), content_type=str(mime)) def _front_or_first(pictures: list[Any]) -> Any: """Prefer the front-cover picture (type 3), else the first available.""" for pic in pictures: if getattr(pic, "type", None) == _FRONT_COVER: return pic return pictures[0] if pictures else None def _decode_vorbis_picture(encoded: str) -> Any: try: return Picture(base64.b64decode(encoded)) # type: ignore[no-untyped-call] except Exception: return None