73d7da440f
Two related gaps surfaced from "uploaded a track, nothing changed / no status": - A track could stay stuck on `pending` forever (an unexpected worker error rolled back the run without recording anything), and `failed` carried no reason. Add `tracks.metadata_error` + `tracks.enriched_at` (migration), stamp the outcome in apply_enrichment, add TrackRepository.mark_enrichment_failed, wrap enrich_task to persist crashes as `failed` in a fresh session, and emit a human-readable no-match reason. Expose metadata_error/enriched_at in TrackOut. - The tag-first merge let junk embedded tags (e.g. "Music Track"/"Sound_13958") override even a 0.99-confidence AcoustID match. Add acoustid_trust_score (default 0.85): above it the acoustic identity wins for title/artist/album/ year, tags are fallback; below it, tag-first as before. Add a license-free real-file fixture (Scarlet Fire / Otis McDonald) whose junk tags AcoustID overrides, with an always-on tag-reader test plus fpcalc/AcoustID/ network-gated identity + full-pipeline tests (skip on host, run in the container). Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
278 lines
11 KiB
Python
278 lines
11 KiB
Python
"""MetadataEnrichmentService — the §6.2 pipeline orchestrator.
|
|
|
|
Order (tag-first): embedded tags → Chromaprint fingerprint → AcoustID lookup.
|
|
Tags fix the common well-tagged case offline; AcoustID identifies the rest and
|
|
supplies a MusicBrainz id. The result updates the track and sets
|
|
``metadata_status`` to ``enriched`` (identity found) or ``failed`` (nothing).
|
|
|
|
Invariants (plan §6.2, CLAUDE.md):
|
|
- **Never touch ``manual``** — a user-edited track is returned untouched.
|
|
- **Graceful degradation** — every external step is wrapped; one failure (no
|
|
fpcalc, no API key, service down) degrades the result, never crashes.
|
|
- **Idempotent** — re-running only fills gaps; ``apply_enrichment`` never erases.
|
|
"""
|
|
|
|
import tempfile
|
|
import uuid
|
|
from dataclasses import dataclass
|
|
from pathlib import Path
|
|
|
|
from app.core.logging import get_logger
|
|
from app.domain.entities.album import Album
|
|
from app.domain.entities.cover import CoverArt
|
|
from app.domain.entities.metadata import AudioTags, RecordingMatch
|
|
from app.domain.ports import (
|
|
AcoustIdClient,
|
|
AlbumRepository,
|
|
ArtistRepository,
|
|
AudioFingerprinter,
|
|
AudioTagReader,
|
|
CoverArtExtractor,
|
|
CoverArtProvider,
|
|
FileStorage,
|
|
TrackRepository,
|
|
)
|
|
|
|
log = get_logger(__name__)
|
|
|
|
_UNKNOWN_ARTIST = "Unknown Artist"
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class EnrichmentResult:
|
|
track_id: uuid.UUID
|
|
status: str # "enriched" | "failed" | "skipped"
|
|
matched_mbid: str | None = None
|
|
|
|
|
|
class MetadataEnrichmentService:
|
|
def __init__(
|
|
self,
|
|
*,
|
|
tracks: TrackRepository,
|
|
artists: ArtistRepository,
|
|
albums: AlbumRepository,
|
|
storage: FileStorage,
|
|
tag_reader: AudioTagReader,
|
|
fingerprinter: AudioFingerprinter,
|
|
acoustid: AcoustIdClient,
|
|
cover_extractor: CoverArtExtractor | None = None,
|
|
cover_provider: CoverArtProvider | None = None,
|
|
acoustid_trust_score: float = 0.85,
|
|
) -> None:
|
|
self._tracks = tracks
|
|
self._artists = artists
|
|
self._albums = albums
|
|
self._storage = storage
|
|
self._tag_reader = tag_reader
|
|
self._fingerprinter = fingerprinter
|
|
self._acoustid = acoustid
|
|
self._cover_extractor = cover_extractor
|
|
self._cover_provider = cover_provider
|
|
self._acoustid_trust_score = acoustid_trust_score
|
|
|
|
async def enrich(self, track_id: uuid.UUID) -> EnrichmentResult:
|
|
track = await self._tracks.get_by_id(track_id)
|
|
if track is None:
|
|
log.info("enrich_track_missing", track_id=str(track_id))
|
|
return EnrichmentResult(track_id=track_id, status="skipped")
|
|
if track.metadata_status == "manual":
|
|
log.info("enrich_skip_manual", track_id=str(track_id))
|
|
return EnrichmentResult(track_id=track_id, status="skipped")
|
|
|
|
tags = await self._read_local(track.storage_uri)
|
|
match = await self._identify(track.storage_uri)
|
|
|
|
# Merge order is tag-first by default — embedded tags fix the common
|
|
# well-tagged offline case. But a *high-confidence* AcoustID match is the
|
|
# more trustworthy identity (downloaded files routinely carry junk tags
|
|
# like "Music Track"/"Sound_12345"), so above the trust threshold the
|
|
# acoustic match wins for the identity fields and tags become fallback.
|
|
tag_title = tags.title if tags else None
|
|
tag_artist = tags.artist if tags else None
|
|
tag_album = tags.album if tags else None
|
|
match_title = match.title if match else None
|
|
match_artist = match.artist if match else None
|
|
match_album = match.album if match else None
|
|
match_year = match.year if match else None
|
|
tag_year = tags.year if tags else None
|
|
trust_match = match is not None and match.score >= self._acoustid_trust_score
|
|
|
|
if trust_match:
|
|
title = _opt_str(match_title, tag_title) or track.title
|
|
artist_name = _opt_str(match_artist, tag_artist)
|
|
album_title = _opt_str(match_album, tag_album)
|
|
year = _first_int(match_year, tag_year)
|
|
else:
|
|
title = _opt_str(tag_title, match_title) or track.title
|
|
artist_name = _opt_str(tag_artist, match_artist)
|
|
album_title = _opt_str(tag_album, match_album)
|
|
year = _first_int(tag_year, match_year)
|
|
genre = tags.genre if tags else None
|
|
track_number = tags.track_number if tags else None
|
|
duration = _first_int(
|
|
tags.duration_seconds if tags else None,
|
|
track.duration_seconds,
|
|
)
|
|
bitrate = tags.bitrate if tags else None
|
|
mbid = match.recording_mbid if match else None
|
|
acoustid_id = match.acoustid if match else None
|
|
|
|
artist_id = await self._resolve_artist(artist_name, fallback=track.artist_id)
|
|
album = await self._resolve_album(album_title, artist_id=artist_id, year=year, mbid=mbid)
|
|
album_id = album.id if album is not None else None
|
|
|
|
if album is not None:
|
|
await self._resolve_cover(
|
|
album,
|
|
storage_uri=track.storage_uri,
|
|
release_group_mbid=match.release_group_mbid if match else None,
|
|
)
|
|
|
|
identified = bool(artist_name) or album_id is not None or mbid is not None
|
|
status = "enriched" if identified else "failed"
|
|
# On a clean "no identity" outcome, record *why* so the UI shows a reason
|
|
# rather than a bare "failed". A successful run clears any prior error.
|
|
metadata_error = None if identified else self._no_match_reason()
|
|
|
|
await self._tracks.apply_enrichment(
|
|
track_id,
|
|
title=title,
|
|
artist_id=artist_id,
|
|
album_id=album_id,
|
|
genre=genre,
|
|
year=year,
|
|
track_number=track_number,
|
|
duration_seconds=duration,
|
|
bitrate=bitrate,
|
|
acoustid_fingerprint=acoustid_id,
|
|
musicbrainz_id=mbid,
|
|
metadata_status=status,
|
|
metadata_error=metadata_error,
|
|
)
|
|
log.info("enrich_complete", track_id=str(track_id), status=status, mbid=mbid)
|
|
return EnrichmentResult(track_id=track_id, status=status, matched_mbid=mbid)
|
|
|
|
def _no_match_reason(self) -> str:
|
|
"""Explain a ``failed`` (no-identity) run in terms a user can act on:
|
|
which optional identification step was unavailable, if any."""
|
|
if not self._fingerprinter.is_available():
|
|
return "No metadata match: audio fingerprinting (fpcalc) is unavailable."
|
|
if not self._acoustid.is_available():
|
|
return "No metadata match: AcoustID lookup is unavailable (no API key)."
|
|
return "No metadata match found in tags or AcoustID."
|
|
|
|
async def _read_local(self, storage_uri: str) -> AudioTags | None:
|
|
try:
|
|
async with self._storage.as_local_path(storage_uri) as path:
|
|
return await self._tag_reader.read(path)
|
|
except Exception:
|
|
log.warning("enrich_tag_step_failed", storage_uri=storage_uri)
|
|
return None
|
|
|
|
async def _identify(self, storage_uri: str) -> RecordingMatch | None:
|
|
if not self._acoustid.is_available() or not self._fingerprinter.is_available():
|
|
return None
|
|
try:
|
|
async with self._storage.as_local_path(storage_uri) as path:
|
|
fingerprint = await self._fingerprinter.calculate(path)
|
|
if fingerprint is None:
|
|
return None
|
|
return await self._acoustid.lookup(fingerprint)
|
|
except Exception:
|
|
log.warning("enrich_identify_step_failed", storage_uri=storage_uri)
|
|
return None
|
|
|
|
async def _resolve_artist(self, name: str | None, *, fallback: uuid.UUID) -> uuid.UUID:
|
|
if not name or name == _UNKNOWN_ARTIST:
|
|
return fallback
|
|
artist = await self._artists.get_or_create(name)
|
|
return artist.id
|
|
|
|
async def _resolve_album(
|
|
self,
|
|
title: str | None,
|
|
*,
|
|
artist_id: uuid.UUID,
|
|
year: int | None,
|
|
mbid: str | None,
|
|
) -> Album | None:
|
|
if not title:
|
|
return None
|
|
return await self._albums.get_or_create(
|
|
title=title,
|
|
artist_id=artist_id,
|
|
year=year,
|
|
musicbrainz_id=mbid,
|
|
)
|
|
|
|
async def _resolve_cover(
|
|
self,
|
|
album: Album,
|
|
*,
|
|
storage_uri: str,
|
|
release_group_mbid: str | None,
|
|
) -> None:
|
|
"""Fill in an album cover when it has none. Source order mirrors the
|
|
tag-first pipeline: embedded artwork (offline) → Cover Art Archive
|
|
(network, by release-group). Best-effort — any failure is swallowed so a
|
|
missing cover never affects enrichment status."""
|
|
if album.cover_path:
|
|
return # already has one — never overwrite (idempotent)
|
|
|
|
cover = await self._extract_cover(storage_uri)
|
|
if cover is None:
|
|
cover = await self._fetch_cover(release_group_mbid)
|
|
if cover is None:
|
|
return
|
|
|
|
try:
|
|
key = await self._save_cover(album.id, cover)
|
|
await self._albums.set_cover_path(album.id, key)
|
|
log.info("cover_resolved", album_id=str(album.id), content_type=cover.content_type)
|
|
except Exception:
|
|
log.warning("cover_save_failed", album_id=str(album.id))
|
|
|
|
async def _extract_cover(self, storage_uri: str) -> CoverArt | None:
|
|
if self._cover_extractor is None:
|
|
return None
|
|
try:
|
|
async with self._storage.as_local_path(storage_uri) as path:
|
|
return await self._cover_extractor.extract(path)
|
|
except Exception:
|
|
log.warning("cover_extract_step_failed", storage_uri=storage_uri)
|
|
return None
|
|
|
|
async def _fetch_cover(self, release_group_mbid: str | None) -> CoverArt | None:
|
|
if self._cover_provider is None or not release_group_mbid:
|
|
return None
|
|
if not self._cover_provider.is_available():
|
|
return None
|
|
try:
|
|
return await self._cover_provider.fetch_release_group(release_group_mbid)
|
|
except Exception:
|
|
log.warning("cover_fetch_step_failed", release_group=release_group_mbid)
|
|
return None
|
|
|
|
async def _save_cover(self, album_id: uuid.UUID, cover: CoverArt) -> str:
|
|
key = f"covers/{album_id}.{cover.extension}"
|
|
with tempfile.NamedTemporaryFile(suffix=f".{cover.extension}") as tmp:
|
|
tmp.write(cover.data)
|
|
tmp.flush()
|
|
await self._storage.save_file(key, Path(tmp.name))
|
|
return key
|
|
|
|
|
|
def _opt_str(*values: str | None) -> str | None:
|
|
for value in values:
|
|
if value:
|
|
return value
|
|
return None
|
|
|
|
|
|
def _first_int(*values: int | None) -> int | None:
|
|
for value in values:
|
|
if value is not None:
|
|
return value
|
|
return None
|