"""MetadataEnrichmentService — the §6.2 pipeline orchestrator. Order (tag-first): embedded tags → Chromaprint fingerprint → AcoustID lookup. Tags fix the common well-tagged case offline; AcoustID identifies the rest and supplies a MusicBrainz id. The result updates the track and sets ``metadata_status`` to ``enriched`` (identity found) or ``failed`` (nothing). Invariants (plan §6.2, CLAUDE.md): - **Never touch ``manual``** — a user-edited track is returned untouched. - **Graceful degradation** — every external step is wrapped; one failure (no fpcalc, no API key, service down) degrades the result, never crashes. - **Idempotent** — re-running only fills gaps; ``apply_enrichment`` never erases. """ import tempfile import uuid from dataclasses import dataclass from pathlib import Path from app.core.logging import get_logger from app.domain.entities.album import Album from app.domain.entities.cover import CoverArt from app.domain.entities.metadata import AudioTags, RecordingMatch from app.domain.ports import ( AcoustIdClient, AlbumRepository, ArtistRepository, AudioFingerprinter, AudioTagReader, CoverArtExtractor, CoverArtProvider, FileStorage, TrackRepository, ) log = get_logger(__name__) _UNKNOWN_ARTIST = "Unknown Artist" @dataclass(frozen=True) class EnrichmentResult: track_id: uuid.UUID status: str # "enriched" | "failed" | "skipped" matched_mbid: str | None = None class MetadataEnrichmentService: def __init__( self, *, tracks: TrackRepository, artists: ArtistRepository, albums: AlbumRepository, storage: FileStorage, tag_reader: AudioTagReader, fingerprinter: AudioFingerprinter, acoustid: AcoustIdClient, cover_extractor: CoverArtExtractor | None = None, cover_provider: CoverArtProvider | None = None, ) -> None: self._tracks = tracks self._artists = artists self._albums = albums self._storage = storage self._tag_reader = tag_reader self._fingerprinter = fingerprinter self._acoustid = acoustid self._cover_extractor = cover_extractor self._cover_provider = cover_provider async def enrich(self, track_id: uuid.UUID) -> EnrichmentResult: track = await self._tracks.get_by_id(track_id) if track is None: log.info("enrich_track_missing", track_id=str(track_id)) return EnrichmentResult(track_id=track_id, status="skipped") if track.metadata_status == "manual": log.info("enrich_skip_manual", track_id=str(track_id)) return EnrichmentResult(track_id=track_id, status="skipped") tags = await self._read_local(track.storage_uri) match = await self._identify(track.storage_uri) # Merge sources: prefer embedded tags, fall back to the AcoustID match. # ``title`` is guaranteed non-None by the existing track title; the rest # stay None when neither source has them. tag_title = tags.title if tags else None tag_artist = tags.artist if tags else None tag_album = tags.album if tags else None title = _opt_str(tag_title, match.title if match else None) or track.title artist_name = _opt_str(tag_artist, match.artist if match else None) album_title = _opt_str(tag_album, match.album if match else None) year = _first_int(tags.year if tags else None, match.year if match else None) genre = tags.genre if tags else None track_number = tags.track_number if tags else None duration = _first_int( tags.duration_seconds if tags else None, track.duration_seconds, ) bitrate = tags.bitrate if tags else None mbid = match.recording_mbid if match else None acoustid_id = match.acoustid if match else None artist_id = await self._resolve_artist(artist_name, fallback=track.artist_id) album = await self._resolve_album(album_title, artist_id=artist_id, year=year, mbid=mbid) album_id = album.id if album is not None else None if album is not None: await self._resolve_cover( album, storage_uri=track.storage_uri, release_group_mbid=match.release_group_mbid if match else None, ) identified = bool(artist_name) or album_id is not None or mbid is not None status = "enriched" if identified else "failed" await self._tracks.apply_enrichment( track_id, title=title, artist_id=artist_id, album_id=album_id, genre=genre, year=year, track_number=track_number, duration_seconds=duration, bitrate=bitrate, acoustid_fingerprint=acoustid_id, musicbrainz_id=mbid, metadata_status=status, ) log.info("enrich_complete", track_id=str(track_id), status=status, mbid=mbid) return EnrichmentResult(track_id=track_id, status=status, matched_mbid=mbid) async def _read_local(self, storage_uri: str) -> AudioTags | None: try: async with self._storage.as_local_path(storage_uri) as path: return await self._tag_reader.read(path) except Exception: log.warning("enrich_tag_step_failed", storage_uri=storage_uri) return None async def _identify(self, storage_uri: str) -> RecordingMatch | None: if not self._acoustid.is_available() or not self._fingerprinter.is_available(): return None try: async with self._storage.as_local_path(storage_uri) as path: fingerprint = await self._fingerprinter.calculate(path) if fingerprint is None: return None return await self._acoustid.lookup(fingerprint) except Exception: log.warning("enrich_identify_step_failed", storage_uri=storage_uri) return None async def _resolve_artist(self, name: str | None, *, fallback: uuid.UUID) -> uuid.UUID: if not name or name == _UNKNOWN_ARTIST: return fallback artist = await self._artists.get_or_create(name) return artist.id async def _resolve_album( self, title: str | None, *, artist_id: uuid.UUID, year: int | None, mbid: str | None, ) -> Album | None: if not title: return None return await self._albums.get_or_create( title=title, artist_id=artist_id, year=year, musicbrainz_id=mbid, ) async def _resolve_cover( self, album: Album, *, storage_uri: str, release_group_mbid: str | None, ) -> None: """Fill in an album cover when it has none. Source order mirrors the tag-first pipeline: embedded artwork (offline) → Cover Art Archive (network, by release-group). Best-effort — any failure is swallowed so a missing cover never affects enrichment status.""" if album.cover_path: return # already has one — never overwrite (idempotent) cover = await self._extract_cover(storage_uri) if cover is None: cover = await self._fetch_cover(release_group_mbid) if cover is None: return try: key = await self._save_cover(album.id, cover) await self._albums.set_cover_path(album.id, key) log.info("cover_resolved", album_id=str(album.id), content_type=cover.content_type) except Exception: log.warning("cover_save_failed", album_id=str(album.id)) async def _extract_cover(self, storage_uri: str) -> CoverArt | None: if self._cover_extractor is None: return None try: async with self._storage.as_local_path(storage_uri) as path: return await self._cover_extractor.extract(path) except Exception: log.warning("cover_extract_step_failed", storage_uri=storage_uri) return None async def _fetch_cover(self, release_group_mbid: str | None) -> CoverArt | None: if self._cover_provider is None or not release_group_mbid: return None if not self._cover_provider.is_available(): return None try: return await self._cover_provider.fetch_release_group(release_group_mbid) except Exception: log.warning("cover_fetch_step_failed", release_group=release_group_mbid) return None async def _save_cover(self, album_id: uuid.UUID, cover: CoverArt) -> str: key = f"covers/{album_id}.{cover.extension}" with tempfile.NamedTemporaryFile(suffix=f".{cover.extension}") as tmp: tmp.write(cover.data) tmp.flush() await self._storage.save_file(key, Path(tmp.name)) return key def _opt_str(*values: str | None) -> str | None: for value in values: if value: return value return None def _first_int(*values: int | None) -> int | None: for value in values: if value is not None: return value return None