"""Unit tests for MetadataEnrichmentService — DB-free, in-memory fakes. Covers the §6.2 orchestration contract: tag-first merge, AcoustID fallback, artist/album resolution, status transitions, and the hard invariants (``manual`` untouched, graceful degradation, idempotent gap-fill). """ import datetime as dt import uuid from collections.abc import AsyncIterator from contextlib import asynccontextmanager from pathlib import Path import pytest from app.application.metadata_service import MetadataEnrichmentService from app.domain.entities import Artist, Track from app.domain.entities.album import Album from app.domain.entities.cover import CoverArt from app.domain.entities.metadata import AudioTags, Fingerprint, RecordingMatch pytestmark = pytest.mark.asyncio _UNKNOWN = "Unknown Artist" def _track(*, metadata_status: str = "pending", title: str = "raw-stem") -> Track: now = dt.datetime.now(dt.UTC) return Track( id=uuid.uuid4(), title=title, artist_id=uuid.uuid4(), # the "Unknown Artist" id album_id=None, storage_uri="tracks/aa/song.mp3", file_format="mp3", file_size=123, source="upload", source_id="deadbeef", duration_seconds=None, genre=None, year=None, metadata_status=metadata_status, created_at=now, updated_at=now, ) class FakeTrackRepo: def __init__(self, track: Track | None) -> None: self._track = track self.applied: dict[str, object] | None = None async def get_by_id(self, track_id: uuid.UUID) -> Track | None: return self._track async def apply_enrichment(self, track_id: uuid.UUID, **kw: object) -> Track: self.applied = kw return self._track # type: ignore[return-value] class FakeArtistRepo: def __init__(self) -> None: self.created: list[str] = [] async def get_or_create(self, name: str) -> Artist: self.created.append(name) now = dt.datetime.now(dt.UTC) return Artist(id=uuid.uuid4(), name=name, created_at=now, updated_at=now) class FakeAlbumRepo: def __init__(self, *, cover_path: str | None = None) -> None: self.created: list[tuple[str, uuid.UUID]] = [] self.covers: dict[uuid.UUID, str] = {} self._existing_cover = cover_path async def get_or_create( self, *, title: str, artist_id: uuid.UUID, year: int | None, musicbrainz_id: str | None ) -> Album: self.created.append((title, artist_id)) now = dt.datetime.now(dt.UTC) return Album( id=uuid.uuid4(), title=title, artist_id=artist_id, year=year, cover_path=self._existing_cover, musicbrainz_id=musicbrainz_id, created_at=now, updated_at=now, ) async def set_cover_path(self, album_id: uuid.UUID, cover_path: str) -> None: self.covers[album_id] = cover_path class FakeStorage: def __init__(self) -> None: self.saved: list[str] = [] @asynccontextmanager async def as_local_path(self, key: str) -> AsyncIterator[Path]: yield Path("/tmp") / key async def save_file(self, key: str, src_path: Path) -> int: self.saved.append(key) return 1 class FakeCoverExtractor: def __init__(self, cover: CoverArt | None) -> None: self._cover = cover self.calls = 0 async def extract(self, path: Path) -> CoverArt | None: self.calls += 1 return self._cover class FakeCoverProvider: def __init__(self, cover: CoverArt | None, *, available: bool = True) -> None: self._cover = cover self._available = available self.calls = 0 def is_available(self) -> bool: return self._available async def fetch_release_group(self, release_group_mbid: str) -> CoverArt | None: self.calls += 1 return self._cover class FakeTagReader: def __init__(self, tags: AudioTags | None) -> None: self._tags = tags async def read(self, path: Path) -> AudioTags | None: return self._tags class FakeFingerprinter: def __init__(self, fp: Fingerprint | None, *, available: bool = True) -> None: self._fp = fp self._available = available def is_available(self) -> bool: return self._available async def calculate(self, path: Path) -> Fingerprint | None: return self._fp class FakeAcoustId: def __init__(self, match: RecordingMatch | None, *, available: bool = True) -> None: self._match = match self._available = available self.calls = 0 def is_available(self) -> bool: return self._available async def lookup(self, fingerprint: Fingerprint) -> RecordingMatch | None: self.calls += 1 return self._match def _service( *, track: Track | None, tags: AudioTags | None = None, fp: Fingerprint | None = None, match: RecordingMatch | None = None, fp_available: bool = True, acoustid_available: bool = True, ) -> tuple[MetadataEnrichmentService, FakeTrackRepo, FakeArtistRepo, FakeAlbumRepo, FakeAcoustId]: tracks = FakeTrackRepo(track) artists = FakeArtistRepo() albums = FakeAlbumRepo() acoustid = FakeAcoustId(match, available=acoustid_available) service = MetadataEnrichmentService( tracks=tracks, # type: ignore[arg-type] artists=artists, # type: ignore[arg-type] albums=albums, # type: ignore[arg-type] storage=FakeStorage(), # type: ignore[arg-type] tag_reader=FakeTagReader(tags), # type: ignore[arg-type] fingerprinter=FakeFingerprinter(fp, available=fp_available), # type: ignore[arg-type] acoustid=acoustid, # type: ignore[arg-type] ) return service, tracks, artists, albums, acoustid async def test_tags_only_enriches_and_relinks_artist_and_album() -> None: track = _track() tags = AudioTags( title="Real Title", artist="Pink Floyd", album="The Wall", genre="Rock", year=1979, track_number=1, duration_seconds=222, ) service, tracks, artists, albums, acoustid = _service(track=track, tags=tags) result = await service.enrich(track.id) assert result.status == "enriched" assert acoustid.calls == 0 # no fingerprint → no lookup needed assert "Pink Floyd" in artists.created assert albums.created and albums.created[0][0] == "The Wall" applied = tracks.applied assert applied is not None assert applied["title"] == "Real Title" assert applied["genre"] == "Rock" assert applied["year"] == 1979 assert applied["track_number"] == 1 assert applied["duration_seconds"] == 222 assert applied["metadata_status"] == "enriched" async def test_manual_track_is_never_touched() -> None: track = _track(metadata_status="manual") service, tracks, _, _, _ = _service(track=track, tags=AudioTags(artist="X")) result = await service.enrich(track.id) assert result.status == "skipped" assert tracks.applied is None # nothing written async def test_missing_track_is_a_clean_noop() -> None: service, tracks, _, _, _ = _service(track=None) result = await service.enrich(uuid.uuid4()) assert result.status == "skipped" assert tracks.applied is None async def test_nothing_found_marks_failed() -> None: track = _track() # No tags, no fingerprint → no identity at all. service, tracks, artists, albums, _acoustid = _service(track=track, tags=None, fp=None) result = await service.enrich(track.id) assert result.status == "failed" assert artists.created == [] # artist stays the original unknown assert albums.created == [] applied = tracks.applied assert applied is not None assert applied["artist_id"] == track.artist_id # fallback kept assert applied["metadata_status"] == "failed" async def test_acoustid_path_fills_when_tags_absent() -> None: track = _track() fp = Fingerprint(fingerprint="AQAAxyz", duration_seconds=200) match = RecordingMatch( acoustid="acoustid-uuid", score=0.95, recording_mbid="mb-recording-id", title="Identified Title", artist="Daft Punk", album="Discovery", ) service, tracks, artists, _albums, acoustid = _service( track=track, tags=None, fp=fp, match=match ) result = await service.enrich(track.id) assert result.status == "enriched" assert result.matched_mbid == "mb-recording-id" assert acoustid.calls == 1 applied = tracks.applied assert applied is not None assert applied["title"] == "Identified Title" assert applied["musicbrainz_id"] == "mb-recording-id" assert applied["acoustid_fingerprint"] == "acoustid-uuid" assert "Daft Punk" in artists.created async def test_tags_win_over_acoustid_for_overlapping_fields() -> None: track = _track() fp = Fingerprint(fingerprint="AQAA", duration_seconds=200) tags = AudioTags(title="Tagged Title", artist="Tagged Artist") match = RecordingMatch( acoustid="aid", score=0.9, recording_mbid="mbid", title="AcoustID Title", artist="AcoustID Artist", ) service, tracks, artists, _albums, _acoustid = _service( track=track, tags=tags, fp=fp, match=match ) await service.enrich(track.id) applied = tracks.applied assert applied is not None assert applied["title"] == "Tagged Title" # tag preferred assert "Tagged Artist" in artists.created # but the MBID from AcoustID is still captured assert applied["musicbrainz_id"] == "mbid" async def test_fingerprint_skipped_when_acoustid_unavailable() -> None: track = _track() fp = Fingerprint(fingerprint="AQAA", duration_seconds=200) service, _tracks, _artists, _albums, acoustid = _service( track=track, tags=AudioTags(artist="Tagged"), fp=fp, acoustid_available=False ) result = await service.enrich(track.id) # tags still enrich, but no AcoustID call is attempted assert acoustid.calls == 0 assert result.status == "enriched" # -- cover-art resolution ----------------------------------------------------- _PNG = CoverArt(data=b"\x89PNG\r\n", content_type="image/png") _JPG = CoverArt(data=b"\xff\xd8\xff", content_type="image/jpeg") def _cover_service( *, track: Track, tags: AudioTags | None = None, match: RecordingMatch | None = None, fp: Fingerprint | None = None, extractor: FakeCoverExtractor | None = None, provider: FakeCoverProvider | None = None, existing_cover: str | None = None, ) -> tuple[MetadataEnrichmentService, FakeAlbumRepo, FakeStorage]: albums = FakeAlbumRepo(cover_path=existing_cover) storage = FakeStorage() service = MetadataEnrichmentService( tracks=FakeTrackRepo(track), # type: ignore[arg-type] artists=FakeArtistRepo(), # type: ignore[arg-type] albums=albums, # type: ignore[arg-type] storage=storage, # type: ignore[arg-type] tag_reader=FakeTagReader(tags), # type: ignore[arg-type] fingerprinter=FakeFingerprinter(fp), # type: ignore[arg-type] acoustid=FakeAcoustId(match), # type: ignore[arg-type] cover_extractor=extractor, # type: ignore[arg-type] cover_provider=provider, # type: ignore[arg-type] ) return service, albums, storage async def test_cover_extracted_from_embedded_art() -> None: track = _track() extractor = FakeCoverExtractor(_PNG) provider = FakeCoverProvider(_JPG) service, albums, storage = _cover_service( track=track, tags=AudioTags(album="The Wall", artist="PF"), extractor=extractor, provider=provider, ) await service.enrich(track.id) assert extractor.calls == 1 assert provider.calls == 0 # embedded art wins → no network fetch assert len(albums.covers) == 1 key = next(iter(albums.covers.values())) assert key.startswith("covers/") and key.endswith(".png") assert storage.saved == [key] async def test_cover_falls_back_to_archive() -> None: track = _track() extractor = FakeCoverExtractor(None) # no embedded art provider = FakeCoverProvider(_JPG) match = RecordingMatch(acoustid="ac", score=1.0, release_group_mbid="rg-123", album="The Wall") fp = Fingerprint(fingerprint="AQAA", duration_seconds=200) service, albums, storage = _cover_service( track=track, tags=AudioTags(album="The Wall", artist="PF"), match=match, fp=fp, extractor=extractor, provider=provider, ) await service.enrich(track.id) assert extractor.calls == 1 assert provider.calls == 1 key = next(iter(albums.covers.values())) assert key.endswith(".jpg") assert storage.saved == [key] async def test_cover_not_fetched_without_release_group() -> None: track = _track() provider = FakeCoverProvider(_JPG) service, albums, _ = _cover_service( track=track, tags=AudioTags(album="The Wall", artist="PF"), extractor=FakeCoverExtractor(None), provider=provider, ) await service.enrich(track.id) assert provider.calls == 0 # no release-group mbid → nothing to look up assert albums.covers == {} async def test_existing_cover_is_not_overwritten() -> None: track = _track() extractor = FakeCoverExtractor(_PNG) service, albums, storage = _cover_service( track=track, tags=AudioTags(album="The Wall", artist="PF"), extractor=extractor, existing_cover="covers/old.jpg", ) await service.enrich(track.id) assert extractor.calls == 0 # album already has a cover → skip entirely assert albums.covers == {} assert storage.saved == [] async def test_cover_skipped_when_no_album() -> None: track = _track() extractor = FakeCoverExtractor(_PNG) # no album tag and no match → no album resolved → no cover work service, _albums, storage = _cover_service(track=track, extractor=extractor) await service.enrich(track.id) assert extractor.calls == 0 assert storage.saved == []