feat(enrichment): record status/errors and trust high-confidence AcoustID
Docker Build & Publish / build (push) Has been cancelled
Docker Build & Publish / push (push) Has been cancelled
Docker Build & Publish / Prune old image versions (push) Has been cancelled

Two related gaps surfaced from "uploaded a track, nothing changed / no status":

- A track could stay stuck on `pending` forever (an unexpected worker error
  rolled back the run without recording anything), and `failed` carried no
  reason. Add `tracks.metadata_error` + `tracks.enriched_at` (migration), stamp
  the outcome in apply_enrichment, add TrackRepository.mark_enrichment_failed,
  wrap enrich_task to persist crashes as `failed` in a fresh session, and emit a
  human-readable no-match reason. Expose metadata_error/enriched_at in TrackOut.

- The tag-first merge let junk embedded tags (e.g. "Music Track"/"Sound_13958")
  override even a 0.99-confidence AcoustID match. Add acoustid_trust_score
  (default 0.85): above it the acoustic identity wins for title/artist/album/
  year, tags are fallback; below it, tag-first as before.

Add a license-free real-file fixture (Scarlet Fire / Otis McDonald) whose junk
tags AcoustID overrides, with an always-on tag-reader test plus fpcalc/AcoustID/
network-gated identity + full-pipeline tests (skip on host, run in the container).

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
Senko-san
2026-06-13 13:29:08 +03:00
parent 30cb8901f2
commit 73d7da440f
17 changed files with 468 additions and 33 deletions
+80 -10
View File
@@ -39,6 +39,8 @@ def _track(*, metadata_status: str = "pending", title: str = "raw-stem") -> Trac
genre=None,
year=None,
metadata_status=metadata_status,
metadata_error=None,
enriched_at=None,
created_at=now,
updated_at=now,
)
@@ -251,6 +253,33 @@ async def test_nothing_found_marks_failed() -> None:
assert applied is not None
assert applied["artist_id"] == track.artist_id # fallback kept
assert applied["metadata_status"] == "failed"
# A failed run records a human-readable reason; here both id steps were
# available, so it's the generic "no match" message.
assert applied["metadata_error"] == "No metadata match found in tags or AcoustID."
async def test_failed_reason_names_unavailable_fingerprinter() -> None:
track = _track()
service, tracks, _, _, _ = _service(track=track, tags=None, fp=None, fp_available=False)
result = await service.enrich(track.id)
assert result.status == "failed"
applied = tracks.applied
assert applied is not None
assert "fingerprinting" in str(applied["metadata_error"])
async def test_successful_enrich_clears_error() -> None:
track = _track()
service, tracks, _, _, _ = _service(track=track, tags=AudioTags(artist="Pink Floyd"))
result = await service.enrich(track.id)
assert result.status == "enriched"
applied = tracks.applied
assert applied is not None
assert applied["metadata_error"] is None
async def test_acoustid_path_fills_when_tags_absent() -> None:
@@ -281,13 +310,14 @@ async def test_acoustid_path_fills_when_tags_absent() -> None:
assert "Daft Punk" in artists.created
async def test_tags_win_over_acoustid_for_overlapping_fields() -> None:
async def test_tags_win_over_low_confidence_acoustid() -> None:
track = _track()
fp = Fingerprint(fingerprint="AQAA", duration_seconds=200)
tags = AudioTags(title="Tagged Title", artist="Tagged Artist")
# Below the 0.85 trust threshold → keep tag-first.
match = RecordingMatch(
acoustid="aid",
score=0.9,
score=0.5,
recording_mbid="mbid",
title="AcoustID Title",
artist="AcoustID Artist",
@@ -306,6 +336,36 @@ async def test_tags_win_over_acoustid_for_overlapping_fields() -> None:
assert applied["musicbrainz_id"] == "mbid"
async def test_high_confidence_acoustid_wins_over_junk_tags() -> None:
track = _track()
fp = Fingerprint(fingerprint="AQAA", duration_seconds=200)
# The real-world bug: junk embedded tags on a downloaded file vs a near-
# certain acoustic identification. The match must win for the identity.
tags = AudioTags(title="Sound_13958", artist="Music Track", album="Музыка")
match = RecordingMatch(
acoustid="aid",
score=0.98,
recording_mbid="mbid",
release_group_mbid="rg",
title="Scarlet Fire",
artist="Otis McDonald",
album="Scarlet Fire",
)
service, tracks, artists, albums, _acoustid = _service(
track=track, tags=tags, fp=fp, match=match
)
await service.enrich(track.id)
applied = tracks.applied
assert applied is not None
assert applied["title"] == "Scarlet Fire"
assert "Otis McDonald" in artists.created
assert "Music Track" not in artists.created
assert albums.created and albums.created[0][0] == "Scarlet Fire"
assert applied["metadata_status"] == "enriched"
async def test_fingerprint_skipped_when_acoustid_unavailable() -> None:
track = _track()
fp = Fingerprint(fingerprint="AQAA", duration_seconds=200)
@@ -356,8 +416,10 @@ async def test_cover_extracted_from_embedded_art() -> None:
extractor = FakeCoverExtractor(_PNG)
provider = FakeCoverProvider(_JPG)
service, albums, storage = _cover_service(
track=track, tags=AudioTags(album="The Wall", artist="PF"),
extractor=extractor, provider=provider,
track=track,
tags=AudioTags(album="The Wall", artist="PF"),
extractor=extractor,
provider=provider,
)
await service.enrich(track.id)
@@ -377,8 +439,12 @@ async def test_cover_falls_back_to_archive() -> None:
match = RecordingMatch(acoustid="ac", score=1.0, release_group_mbid="rg-123", album="The Wall")
fp = Fingerprint(fingerprint="AQAA", duration_seconds=200)
service, albums, storage = _cover_service(
track=track, tags=AudioTags(album="The Wall", artist="PF"),
match=match, fp=fp, extractor=extractor, provider=provider,
track=track,
tags=AudioTags(album="The Wall", artist="PF"),
match=match,
fp=fp,
extractor=extractor,
provider=provider,
)
await service.enrich(track.id)
@@ -394,8 +460,10 @@ async def test_cover_not_fetched_without_release_group() -> None:
track = _track()
provider = FakeCoverProvider(_JPG)
service, albums, _ = _cover_service(
track=track, tags=AudioTags(album="The Wall", artist="PF"),
extractor=FakeCoverExtractor(None), provider=provider,
track=track,
tags=AudioTags(album="The Wall", artist="PF"),
extractor=FakeCoverExtractor(None),
provider=provider,
)
await service.enrich(track.id)
@@ -408,8 +476,10 @@ async def test_existing_cover_is_not_overwritten() -> None:
track = _track()
extractor = FakeCoverExtractor(_PNG)
service, albums, storage = _cover_service(
track=track, tags=AudioTags(album="The Wall", artist="PF"),
extractor=extractor, existing_cover="covers/old.jpg",
track=track,
tags=AudioTags(album="The Wall", artist="PF"),
extractor=extractor,
existing_cover="covers/old.jpg",
)
await service.enrich(track.id)