"""Enrichment tests against a real audio file (``tests/fixtures/``). The fixture "Scarlet Fire" by Otis McDonald carries *junk* embedded tags (``Sound_13958`` / ``Music Track`` / ``Музыка``) yet is identified by AcoustID with ~0.99 confidence. That makes it the real-world reproduction of the "uploaded a track, got the wrong name/artist" bug: tag reading must be exact, and a high-confidence AcoustID match must override the junk tags. Two layers: - The tag-reader test is offline and deterministic — it always runs. - The AcoustID/identity tests need the ``fpcalc`` binary, an AcoustID API key, and network. They *skip* (never fail) when those aren't present, honouring the project rule that the suite never hard-requires network. They do run inside the api/worker container (``make test-api``), which ships fpcalc + the key. """ import uuid from collections.abc import AsyncIterator from contextlib import asynccontextmanager from dataclasses import dataclass, field from datetime import UTC, datetime from pathlib import Path import pytest from app.application.metadata_service import MetadataEnrichmentService from app.core.config import get_settings from app.domain.entities.album import Album from app.domain.entities.track import Artist, Track from app.infrastructure.metadata.acoustid import AcoustIdHttpClient from app.infrastructure.metadata.fingerprint import FpcalcFingerprinter from app.infrastructure.metadata.tags import MutagenTagReader pytestmark = pytest.mark.asyncio FIXTURE = Path(__file__).parent / "fixtures" / "scarlet_fire_otis_mcdonald.mp3" _settings = get_settings() _fpcalc = FpcalcFingerprinter(_settings.fpcalc_path) # Gate for the network/identity tests — present in the container, absent in CI. requires_acoustid = pytest.mark.skipif( not (_fpcalc.is_available() and _settings.acoustid_api_key is not None), reason="needs the fpcalc binary + ACOUSTID_API_KEY (+ network)", ) def _acoustid_client() -> AcoustIdHttpClient: key = _settings.acoustid_api_key return AcoustIdHttpClient( api_key=key.get_secret_value() if key else None, user_agent=_settings.musicbrainz_user_agent, api_url=_settings.acoustid_api_url, ) # --- offline: tag reading on a real file ----------------------------------- async def test_real_file_embedded_tags_are_read() -> None: """The reader extracts the file's actual (junk) embedded tags verbatim — proving real-file tag parsing works end to end, no network involved.""" assert FIXTURE.exists(), "fixture mp3 missing" tags = await MutagenTagReader().read(FIXTURE) assert tags is not None assert tags.title == "Sound_13958" assert tags.artist == "Music Track" assert tags.album == "Музыка" assert tags.genre == "Hip Hop & Rap" assert tags.year == 2018 assert tags.duration_seconds == 143 assert tags.bitrate == 128 # --- networked: AcoustID identifies the real recording --------------------- @requires_acoustid async def test_real_file_identified_by_acoustid() -> None: """fpcalc → AcoustID identifies the real audio as Scarlet Fire / Otis McDonald with high confidence (despite the junk tags).""" fingerprint = await _fpcalc.calculate(FIXTURE) if fingerprint is None: pytest.skip("fpcalc produced no fingerprint") match = await _acoustid_client().lookup(fingerprint) if match is None: pytest.skip("AcoustID returned no match (network/rate limit?)") assert match.score >= _settings.acoustid_trust_score assert match.title == "Scarlet Fire" assert match.artist == "Otis McDonald" assert match.recording_mbid is not None @requires_acoustid async def test_real_file_enrichment_overrides_junk_tags() -> None: """Full pipeline on the real file with the real tag-reader, fingerprinter and AcoustID client: the high-confidence match wins over the junk embedded tags, so the track is stored as Scarlet Fire / Otis McDonald.""" track = _pending_track() tracks = _FakeTrackRepo(track) artists = _FakeArtistRepo() albums = _FakeAlbumRepo() service = MetadataEnrichmentService( tracks=tracks, # type: ignore[arg-type] artists=artists, # type: ignore[arg-type] albums=albums, # type: ignore[arg-type] storage=_FixtureStorage(), # type: ignore[arg-type] tag_reader=MutagenTagReader(), fingerprinter=_fpcalc, acoustid=_acoustid_client(), acoustid_trust_score=_settings.acoustid_trust_score, ) result = await service.enrich(track.id) if result.status == "failed": pytest.skip("AcoustID unavailable at run time (network/rate limit?)") assert result.status == "enriched" applied = tracks.applied assert applied is not None assert applied["title"] == "Scarlet Fire" assert "Otis McDonald" in artists.created assert "Music Track" not in artists.created assert albums.created and albums.created[0][0] == "Scarlet Fire" # --- minimal in-memory adapters -------------------------------------------- def _pending_track() -> Track: now = datetime.now(UTC) return Track( id=uuid.uuid4(), title="scarlet_fire_otis_mcdonald", # the upload-time filename stem artist_id=uuid.uuid4(), album_id=None, storage_uri="tracks/sf/scarlet.mp3", file_format="mp3", file_size=FIXTURE.stat().st_size, source="upload", source_id="sha-real", duration_seconds=None, genre=None, year=None, track_number=None, metadata_status="pending", metadata_error=None, enriched_at=None, availability="local", created_at=now, updated_at=now, ) class _FixtureStorage: @asynccontextmanager async def as_local_path(self, _key: str) -> AsyncIterator[Path]: yield FIXTURE class _FakeTrackRepo: def __init__(self, track: Track) -> None: self._track = track self.applied: dict[str, object] | None = None async def get_by_id(self, _track_id: uuid.UUID) -> Track: return self._track async def apply_enrichment(self, _track_id: uuid.UUID, **kw: object) -> Track: self.applied = kw return self._track @dataclass class _FakeArtistRepo: created: list[str] = field(default_factory=list) async def get_or_create(self, name: str) -> Artist: self.created.append(name) now = datetime.now(UTC) return Artist( id=uuid.uuid4(), name=name, source=None, source_id=None, created_at=now, updated_at=now, ) @dataclass class _FakeAlbumRepo: created: list[tuple[str, uuid.UUID]] = field(default_factory=list) async def get_or_create( self, *, title: str, artist_id: uuid.UUID, year: int | None, musicbrainz_id: str | None ) -> Album: self.created.append((title, artist_id)) now = datetime.now(UTC) return Album( id=uuid.uuid4(), title=title, artist_id=artist_id, year=year, cover_path=None, musicbrainz_id=musicbrainz_id, source=None, source_id=None, created_at=now, updated_at=now, ) async def set_cover_path(self, _album_id: uuid.UUID, _cover_path: str) -> None: return None