feat(enrichment): record status/errors and trust high-confidence AcoustID
Two related gaps surfaced from "uploaded a track, nothing changed / no status": - A track could stay stuck on `pending` forever (an unexpected worker error rolled back the run without recording anything), and `failed` carried no reason. Add `tracks.metadata_error` + `tracks.enriched_at` (migration), stamp the outcome in apply_enrichment, add TrackRepository.mark_enrichment_failed, wrap enrich_task to persist crashes as `failed` in a fresh session, and emit a human-readable no-match reason. Expose metadata_error/enriched_at in TrackOut. - The tag-first merge let junk embedded tags (e.g. "Music Track"/"Sound_13958") override even a 0.99-confidence AcoustID match. Add acoustid_trust_score (default 0.85): above it the acoustic identity wins for title/artist/album/ year, tags are fallback; below it, tag-first as before. Add a license-free real-file fixture (Scarlet Fire / Otis McDonald) whose junk tags AcoustID overrides, with an always-on tag-reader test plus fpcalc/AcoustID/ network-gated identity + full-pipeline tests (skip on host, run in the container). Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -39,6 +39,8 @@ def _track(*, metadata_status: str = "pending", title: str = "raw-stem") -> Trac
|
||||
genre=None,
|
||||
year=None,
|
||||
metadata_status=metadata_status,
|
||||
metadata_error=None,
|
||||
enriched_at=None,
|
||||
created_at=now,
|
||||
updated_at=now,
|
||||
)
|
||||
@@ -251,6 +253,33 @@ async def test_nothing_found_marks_failed() -> None:
|
||||
assert applied is not None
|
||||
assert applied["artist_id"] == track.artist_id # fallback kept
|
||||
assert applied["metadata_status"] == "failed"
|
||||
# A failed run records a human-readable reason; here both id steps were
|
||||
# available, so it's the generic "no match" message.
|
||||
assert applied["metadata_error"] == "No metadata match found in tags or AcoustID."
|
||||
|
||||
|
||||
async def test_failed_reason_names_unavailable_fingerprinter() -> None:
|
||||
track = _track()
|
||||
service, tracks, _, _, _ = _service(track=track, tags=None, fp=None, fp_available=False)
|
||||
|
||||
result = await service.enrich(track.id)
|
||||
|
||||
assert result.status == "failed"
|
||||
applied = tracks.applied
|
||||
assert applied is not None
|
||||
assert "fingerprinting" in str(applied["metadata_error"])
|
||||
|
||||
|
||||
async def test_successful_enrich_clears_error() -> None:
|
||||
track = _track()
|
||||
service, tracks, _, _, _ = _service(track=track, tags=AudioTags(artist="Pink Floyd"))
|
||||
|
||||
result = await service.enrich(track.id)
|
||||
|
||||
assert result.status == "enriched"
|
||||
applied = tracks.applied
|
||||
assert applied is not None
|
||||
assert applied["metadata_error"] is None
|
||||
|
||||
|
||||
async def test_acoustid_path_fills_when_tags_absent() -> None:
|
||||
@@ -281,13 +310,14 @@ async def test_acoustid_path_fills_when_tags_absent() -> None:
|
||||
assert "Daft Punk" in artists.created
|
||||
|
||||
|
||||
async def test_tags_win_over_acoustid_for_overlapping_fields() -> None:
|
||||
async def test_tags_win_over_low_confidence_acoustid() -> None:
|
||||
track = _track()
|
||||
fp = Fingerprint(fingerprint="AQAA", duration_seconds=200)
|
||||
tags = AudioTags(title="Tagged Title", artist="Tagged Artist")
|
||||
# Below the 0.85 trust threshold → keep tag-first.
|
||||
match = RecordingMatch(
|
||||
acoustid="aid",
|
||||
score=0.9,
|
||||
score=0.5,
|
||||
recording_mbid="mbid",
|
||||
title="AcoustID Title",
|
||||
artist="AcoustID Artist",
|
||||
@@ -306,6 +336,36 @@ async def test_tags_win_over_acoustid_for_overlapping_fields() -> None:
|
||||
assert applied["musicbrainz_id"] == "mbid"
|
||||
|
||||
|
||||
async def test_high_confidence_acoustid_wins_over_junk_tags() -> None:
|
||||
track = _track()
|
||||
fp = Fingerprint(fingerprint="AQAA", duration_seconds=200)
|
||||
# The real-world bug: junk embedded tags on a downloaded file vs a near-
|
||||
# certain acoustic identification. The match must win for the identity.
|
||||
tags = AudioTags(title="Sound_13958", artist="Music Track", album="Музыка")
|
||||
match = RecordingMatch(
|
||||
acoustid="aid",
|
||||
score=0.98,
|
||||
recording_mbid="mbid",
|
||||
release_group_mbid="rg",
|
||||
title="Scarlet Fire",
|
||||
artist="Otis McDonald",
|
||||
album="Scarlet Fire",
|
||||
)
|
||||
service, tracks, artists, albums, _acoustid = _service(
|
||||
track=track, tags=tags, fp=fp, match=match
|
||||
)
|
||||
|
||||
await service.enrich(track.id)
|
||||
|
||||
applied = tracks.applied
|
||||
assert applied is not None
|
||||
assert applied["title"] == "Scarlet Fire"
|
||||
assert "Otis McDonald" in artists.created
|
||||
assert "Music Track" not in artists.created
|
||||
assert albums.created and albums.created[0][0] == "Scarlet Fire"
|
||||
assert applied["metadata_status"] == "enriched"
|
||||
|
||||
|
||||
async def test_fingerprint_skipped_when_acoustid_unavailable() -> None:
|
||||
track = _track()
|
||||
fp = Fingerprint(fingerprint="AQAA", duration_seconds=200)
|
||||
@@ -356,8 +416,10 @@ async def test_cover_extracted_from_embedded_art() -> None:
|
||||
extractor = FakeCoverExtractor(_PNG)
|
||||
provider = FakeCoverProvider(_JPG)
|
||||
service, albums, storage = _cover_service(
|
||||
track=track, tags=AudioTags(album="The Wall", artist="PF"),
|
||||
extractor=extractor, provider=provider,
|
||||
track=track,
|
||||
tags=AudioTags(album="The Wall", artist="PF"),
|
||||
extractor=extractor,
|
||||
provider=provider,
|
||||
)
|
||||
|
||||
await service.enrich(track.id)
|
||||
@@ -377,8 +439,12 @@ async def test_cover_falls_back_to_archive() -> None:
|
||||
match = RecordingMatch(acoustid="ac", score=1.0, release_group_mbid="rg-123", album="The Wall")
|
||||
fp = Fingerprint(fingerprint="AQAA", duration_seconds=200)
|
||||
service, albums, storage = _cover_service(
|
||||
track=track, tags=AudioTags(album="The Wall", artist="PF"),
|
||||
match=match, fp=fp, extractor=extractor, provider=provider,
|
||||
track=track,
|
||||
tags=AudioTags(album="The Wall", artist="PF"),
|
||||
match=match,
|
||||
fp=fp,
|
||||
extractor=extractor,
|
||||
provider=provider,
|
||||
)
|
||||
|
||||
await service.enrich(track.id)
|
||||
@@ -394,8 +460,10 @@ async def test_cover_not_fetched_without_release_group() -> None:
|
||||
track = _track()
|
||||
provider = FakeCoverProvider(_JPG)
|
||||
service, albums, _ = _cover_service(
|
||||
track=track, tags=AudioTags(album="The Wall", artist="PF"),
|
||||
extractor=FakeCoverExtractor(None), provider=provider,
|
||||
track=track,
|
||||
tags=AudioTags(album="The Wall", artist="PF"),
|
||||
extractor=FakeCoverExtractor(None),
|
||||
provider=provider,
|
||||
)
|
||||
|
||||
await service.enrich(track.id)
|
||||
@@ -408,8 +476,10 @@ async def test_existing_cover_is_not_overwritten() -> None:
|
||||
track = _track()
|
||||
extractor = FakeCoverExtractor(_PNG)
|
||||
service, albums, storage = _cover_service(
|
||||
track=track, tags=AudioTags(album="The Wall", artist="PF"),
|
||||
extractor=extractor, existing_cover="covers/old.jpg",
|
||||
track=track,
|
||||
tags=AudioTags(album="The Wall", artist="PF"),
|
||||
extractor=extractor,
|
||||
existing_cover="covers/old.jpg",
|
||||
)
|
||||
|
||||
await service.enrich(track.id)
|
||||
|
||||
Reference in New Issue
Block a user