63c7d05eca
Adds inline AcoustID match-finding (multiple ranked candidates via
lookup_all) and PUT /tracks/{id}/metadata for manual edits, resolving
artist/album and setting metadata_status=manual. Extends TrackOut with
genre/year/track_number.
208 lines
7.1 KiB
Python
208 lines
7.1 KiB
Python
"""Enrichment tests against a real audio file (``tests/fixtures/``).
|
|
|
|
The fixture "Scarlet Fire" by Otis McDonald carries *junk* embedded tags
|
|
(``Sound_13958`` / ``Music Track`` / ``Музыка``) yet is identified by AcoustID
|
|
with ~0.99 confidence. That makes it the real-world reproduction of the
|
|
"uploaded a track, got the wrong name/artist" bug: tag reading must be exact,
|
|
and a high-confidence AcoustID match must override the junk tags.
|
|
|
|
Two layers:
|
|
- The tag-reader test is offline and deterministic — it always runs.
|
|
- The AcoustID/identity tests need the ``fpcalc`` binary, an AcoustID API key,
|
|
and network. They *skip* (never fail) when those aren't present, honouring the
|
|
project rule that the suite never hard-requires network. They do run inside the
|
|
api/worker container (``make test-api``), which ships fpcalc + the key.
|
|
"""
|
|
|
|
import uuid
|
|
from collections.abc import AsyncIterator
|
|
from contextlib import asynccontextmanager
|
|
from dataclasses import dataclass, field
|
|
from datetime import UTC, datetime
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
from app.application.metadata_service import MetadataEnrichmentService
|
|
from app.core.config import get_settings
|
|
from app.domain.entities.album import Album
|
|
from app.domain.entities.track import Artist, Track
|
|
from app.infrastructure.metadata.acoustid import AcoustIdHttpClient
|
|
from app.infrastructure.metadata.fingerprint import FpcalcFingerprinter
|
|
from app.infrastructure.metadata.tags import MutagenTagReader
|
|
|
|
pytestmark = pytest.mark.asyncio
|
|
|
|
FIXTURE = Path(__file__).parent / "fixtures" / "scarlet_fire_otis_mcdonald.mp3"
|
|
|
|
_settings = get_settings()
|
|
_fpcalc = FpcalcFingerprinter(_settings.fpcalc_path)
|
|
|
|
# Gate for the network/identity tests — present in the container, absent in CI.
|
|
requires_acoustid = pytest.mark.skipif(
|
|
not (_fpcalc.is_available() and _settings.acoustid_api_key is not None),
|
|
reason="needs the fpcalc binary + ACOUSTID_API_KEY (+ network)",
|
|
)
|
|
|
|
|
|
def _acoustid_client() -> AcoustIdHttpClient:
|
|
key = _settings.acoustid_api_key
|
|
return AcoustIdHttpClient(
|
|
api_key=key.get_secret_value() if key else None,
|
|
user_agent=_settings.musicbrainz_user_agent,
|
|
api_url=_settings.acoustid_api_url,
|
|
)
|
|
|
|
|
|
# --- offline: tag reading on a real file -----------------------------------
|
|
|
|
|
|
async def test_real_file_embedded_tags_are_read() -> None:
|
|
"""The reader extracts the file's actual (junk) embedded tags verbatim —
|
|
proving real-file tag parsing works end to end, no network involved."""
|
|
assert FIXTURE.exists(), "fixture mp3 missing"
|
|
tags = await MutagenTagReader().read(FIXTURE)
|
|
|
|
assert tags is not None
|
|
assert tags.title == "Sound_13958"
|
|
assert tags.artist == "Music Track"
|
|
assert tags.album == "Музыка"
|
|
assert tags.genre == "Hip Hop & Rap"
|
|
assert tags.year == 2018
|
|
assert tags.duration_seconds == 143
|
|
assert tags.bitrate == 128
|
|
|
|
|
|
# --- networked: AcoustID identifies the real recording ---------------------
|
|
|
|
|
|
@requires_acoustid
|
|
async def test_real_file_identified_by_acoustid() -> None:
|
|
"""fpcalc → AcoustID identifies the real audio as Scarlet Fire / Otis
|
|
McDonald with high confidence (despite the junk tags)."""
|
|
fingerprint = await _fpcalc.calculate(FIXTURE)
|
|
if fingerprint is None:
|
|
pytest.skip("fpcalc produced no fingerprint")
|
|
|
|
match = await _acoustid_client().lookup(fingerprint)
|
|
if match is None:
|
|
pytest.skip("AcoustID returned no match (network/rate limit?)")
|
|
|
|
assert match.score >= _settings.acoustid_trust_score
|
|
assert match.title == "Scarlet Fire"
|
|
assert match.artist == "Otis McDonald"
|
|
assert match.recording_mbid is not None
|
|
|
|
|
|
@requires_acoustid
|
|
async def test_real_file_enrichment_overrides_junk_tags() -> None:
|
|
"""Full pipeline on the real file with the real tag-reader, fingerprinter
|
|
and AcoustID client: the high-confidence match wins over the junk embedded
|
|
tags, so the track is stored as Scarlet Fire / Otis McDonald."""
|
|
track = _pending_track()
|
|
tracks = _FakeTrackRepo(track)
|
|
artists = _FakeArtistRepo()
|
|
albums = _FakeAlbumRepo()
|
|
|
|
service = MetadataEnrichmentService(
|
|
tracks=tracks, # type: ignore[arg-type]
|
|
artists=artists, # type: ignore[arg-type]
|
|
albums=albums, # type: ignore[arg-type]
|
|
storage=_FixtureStorage(), # type: ignore[arg-type]
|
|
tag_reader=MutagenTagReader(),
|
|
fingerprinter=_fpcalc,
|
|
acoustid=_acoustid_client(),
|
|
acoustid_trust_score=_settings.acoustid_trust_score,
|
|
)
|
|
|
|
result = await service.enrich(track.id)
|
|
if result.status == "failed":
|
|
pytest.skip("AcoustID unavailable at run time (network/rate limit?)")
|
|
|
|
assert result.status == "enriched"
|
|
applied = tracks.applied
|
|
assert applied is not None
|
|
assert applied["title"] == "Scarlet Fire"
|
|
assert "Otis McDonald" in artists.created
|
|
assert "Music Track" not in artists.created
|
|
assert albums.created and albums.created[0][0] == "Scarlet Fire"
|
|
|
|
|
|
# --- minimal in-memory adapters --------------------------------------------
|
|
|
|
|
|
def _pending_track() -> Track:
|
|
now = datetime.now(UTC)
|
|
return Track(
|
|
id=uuid.uuid4(),
|
|
title="scarlet_fire_otis_mcdonald", # the upload-time filename stem
|
|
artist_id=uuid.uuid4(),
|
|
album_id=None,
|
|
storage_uri="tracks/sf/scarlet.mp3",
|
|
file_format="mp3",
|
|
file_size=FIXTURE.stat().st_size,
|
|
source="upload",
|
|
source_id="sha-real",
|
|
duration_seconds=None,
|
|
genre=None,
|
|
year=None,
|
|
track_number=None,
|
|
metadata_status="pending",
|
|
metadata_error=None,
|
|
enriched_at=None,
|
|
created_at=now,
|
|
updated_at=now,
|
|
)
|
|
|
|
|
|
class _FixtureStorage:
|
|
@asynccontextmanager
|
|
async def as_local_path(self, _key: str) -> AsyncIterator[Path]:
|
|
yield FIXTURE
|
|
|
|
|
|
class _FakeTrackRepo:
|
|
def __init__(self, track: Track) -> None:
|
|
self._track = track
|
|
self.applied: dict[str, object] | None = None
|
|
|
|
async def get_by_id(self, _track_id: uuid.UUID) -> Track:
|
|
return self._track
|
|
|
|
async def apply_enrichment(self, _track_id: uuid.UUID, **kw: object) -> Track:
|
|
self.applied = kw
|
|
return self._track
|
|
|
|
|
|
@dataclass
|
|
class _FakeArtistRepo:
|
|
created: list[str] = field(default_factory=list)
|
|
|
|
async def get_or_create(self, name: str) -> Artist:
|
|
self.created.append(name)
|
|
now = datetime.now(UTC)
|
|
return Artist(id=uuid.uuid4(), name=name, created_at=now, updated_at=now)
|
|
|
|
|
|
@dataclass
|
|
class _FakeAlbumRepo:
|
|
created: list[tuple[str, uuid.UUID]] = field(default_factory=list)
|
|
|
|
async def get_or_create(
|
|
self, *, title: str, artist_id: uuid.UUID, year: int | None, musicbrainz_id: str | None
|
|
) -> Album:
|
|
self.created.append((title, artist_id))
|
|
now = datetime.now(UTC)
|
|
return Album(
|
|
id=uuid.uuid4(),
|
|
title=title,
|
|
artist_id=artist_id,
|
|
year=year,
|
|
cover_path=None,
|
|
musicbrainz_id=musicbrainz_id,
|
|
created_at=now,
|
|
updated_at=now,
|
|
)
|
|
|
|
async def set_cover_path(self, _album_id: uuid.UUID, _cover_path: str) -> None:
|
|
return None
|