feat(enrichment): record status/errors and trust high-confidence AcoustID
Two related gaps surfaced from "uploaded a track, nothing changed / no status": - A track could stay stuck on `pending` forever (an unexpected worker error rolled back the run without recording anything), and `failed` carried no reason. Add `tracks.metadata_error` + `tracks.enriched_at` (migration), stamp the outcome in apply_enrichment, add TrackRepository.mark_enrichment_failed, wrap enrich_task to persist crashes as `failed` in a fresh session, and emit a human-readable no-match reason. Expose metadata_error/enriched_at in TrackOut. - The tag-first merge let junk embedded tags (e.g. "Music Track"/"Sound_13958") override even a 0.99-confidence AcoustID match. Add acoustid_trust_score (default 0.85): above it the acoustic identity wins for title/artist/album/ year, tags are fallback; below it, tag-first as before. Add a license-free real-file fixture (Scarlet Fire / Otis McDonald) whose junk tags AcoustID overrides, with an always-on tag-reader test plus fpcalc/AcoustID/ network-gated identity + full-pipeline tests (skip on host, run in the container). Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -58,6 +58,7 @@ class MetadataEnrichmentService:
|
||||
acoustid: AcoustIdClient,
|
||||
cover_extractor: CoverArtExtractor | None = None,
|
||||
cover_provider: CoverArtProvider | None = None,
|
||||
acoustid_trust_score: float = 0.85,
|
||||
) -> None:
|
||||
self._tracks = tracks
|
||||
self._artists = artists
|
||||
@@ -68,6 +69,7 @@ class MetadataEnrichmentService:
|
||||
self._acoustid = acoustid
|
||||
self._cover_extractor = cover_extractor
|
||||
self._cover_provider = cover_provider
|
||||
self._acoustid_trust_score = acoustid_trust_score
|
||||
|
||||
async def enrich(self, track_id: uuid.UUID) -> EnrichmentResult:
|
||||
track = await self._tracks.get_by_id(track_id)
|
||||
@@ -81,16 +83,31 @@ class MetadataEnrichmentService:
|
||||
tags = await self._read_local(track.storage_uri)
|
||||
match = await self._identify(track.storage_uri)
|
||||
|
||||
# Merge sources: prefer embedded tags, fall back to the AcoustID match.
|
||||
# ``title`` is guaranteed non-None by the existing track title; the rest
|
||||
# stay None when neither source has them.
|
||||
# Merge order is tag-first by default — embedded tags fix the common
|
||||
# well-tagged offline case. But a *high-confidence* AcoustID match is the
|
||||
# more trustworthy identity (downloaded files routinely carry junk tags
|
||||
# like "Music Track"/"Sound_12345"), so above the trust threshold the
|
||||
# acoustic match wins for the identity fields and tags become fallback.
|
||||
tag_title = tags.title if tags else None
|
||||
tag_artist = tags.artist if tags else None
|
||||
tag_album = tags.album if tags else None
|
||||
title = _opt_str(tag_title, match.title if match else None) or track.title
|
||||
artist_name = _opt_str(tag_artist, match.artist if match else None)
|
||||
album_title = _opt_str(tag_album, match.album if match else None)
|
||||
year = _first_int(tags.year if tags else None, match.year if match else None)
|
||||
match_title = match.title if match else None
|
||||
match_artist = match.artist if match else None
|
||||
match_album = match.album if match else None
|
||||
match_year = match.year if match else None
|
||||
tag_year = tags.year if tags else None
|
||||
trust_match = match is not None and match.score >= self._acoustid_trust_score
|
||||
|
||||
if trust_match:
|
||||
title = _opt_str(match_title, tag_title) or track.title
|
||||
artist_name = _opt_str(match_artist, tag_artist)
|
||||
album_title = _opt_str(match_album, tag_album)
|
||||
year = _first_int(match_year, tag_year)
|
||||
else:
|
||||
title = _opt_str(tag_title, match_title) or track.title
|
||||
artist_name = _opt_str(tag_artist, match_artist)
|
||||
album_title = _opt_str(tag_album, match_album)
|
||||
year = _first_int(tag_year, match_year)
|
||||
genre = tags.genre if tags else None
|
||||
track_number = tags.track_number if tags else None
|
||||
duration = _first_int(
|
||||
@@ -114,6 +131,9 @@ class MetadataEnrichmentService:
|
||||
|
||||
identified = bool(artist_name) or album_id is not None or mbid is not None
|
||||
status = "enriched" if identified else "failed"
|
||||
# On a clean "no identity" outcome, record *why* so the UI shows a reason
|
||||
# rather than a bare "failed". A successful run clears any prior error.
|
||||
metadata_error = None if identified else self._no_match_reason()
|
||||
|
||||
await self._tracks.apply_enrichment(
|
||||
track_id,
|
||||
@@ -128,10 +148,20 @@ class MetadataEnrichmentService:
|
||||
acoustid_fingerprint=acoustid_id,
|
||||
musicbrainz_id=mbid,
|
||||
metadata_status=status,
|
||||
metadata_error=metadata_error,
|
||||
)
|
||||
log.info("enrich_complete", track_id=str(track_id), status=status, mbid=mbid)
|
||||
return EnrichmentResult(track_id=track_id, status=status, matched_mbid=mbid)
|
||||
|
||||
def _no_match_reason(self) -> str:
|
||||
"""Explain a ``failed`` (no-identity) run in terms a user can act on:
|
||||
which optional identification step was unavailable, if any."""
|
||||
if not self._fingerprinter.is_available():
|
||||
return "No metadata match: audio fingerprinting (fpcalc) is unavailable."
|
||||
if not self._acoustid.is_available():
|
||||
return "No metadata match: AcoustID lookup is unavailable (no API key)."
|
||||
return "No metadata match found in tags or AcoustID."
|
||||
|
||||
async def _read_local(self, storage_uri: str) -> AudioTags | None:
|
||||
try:
|
||||
async with self._storage.as_local_path(storage_uri) as path:
|
||||
|
||||
Reference in New Issue
Block a user