Files
mcma-backend/app/application/metadata_service.py
T
Senko-san 58b98ab5ed
Docker Build & Publish / build (push) Successful in 1m10s
Docker Build & Publish / push (push) Failing after 7s
Docker Build & Publish / Prune old image versions (push) Has been skipped
feat(library): lazy materialization foundation for remote tracks (§Phase1)
Adds nullable storage fields + availability column on tracks, remote
source/source_id identity on albums/artists, TrackRepository.materialize()
and get_or_create_remote() repos — groundwork for on-demand YTM library
(placeholders saved without audio, materialized in-place on first play).

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-14 17:51:43 +03:00

307 lines
12 KiB
Python

"""MetadataEnrichmentService — the §6.2 pipeline orchestrator.
Order (tag-first): embedded tags → Chromaprint fingerprint → AcoustID lookup.
Tags fix the common well-tagged case offline; AcoustID identifies the rest and
supplies a MusicBrainz id. The result updates the track and sets
``metadata_status`` to ``enriched`` (identity found) or ``failed`` (nothing).
Invariants (plan §6.2, CLAUDE.md):
- **Never touch ``manual``** — a user-edited track is returned untouched.
- **Graceful degradation** — every external step is wrapped; one failure (no
fpcalc, no API key, service down) degrades the result, never crashes.
- **Idempotent** — re-running only fills gaps; ``apply_enrichment`` never erases.
"""
import tempfile
import uuid
from dataclasses import dataclass
from pathlib import Path
from app.core.logging import get_logger
from app.domain.entities.album import Album
from app.domain.entities.cover import CoverArt
from app.domain.entities.metadata import AudioTags, RecordingMatch
from app.domain.ports import (
AcoustIdClient,
AlbumRepository,
ArtistRepository,
AudioFingerprinter,
AudioTagReader,
CoverArtExtractor,
CoverArtProvider,
FileStorage,
TrackRepository,
)
log = get_logger(__name__)
_UNKNOWN_ARTIST = "Unknown Artist"
@dataclass(frozen=True)
class EnrichmentResult:
track_id: uuid.UUID
status: str # "enriched" | "failed" | "skipped"
matched_mbid: str | None = None
class MetadataEnrichmentService:
def __init__(
self,
*,
tracks: TrackRepository,
artists: ArtistRepository,
albums: AlbumRepository,
storage: FileStorage,
tag_reader: AudioTagReader,
fingerprinter: AudioFingerprinter,
acoustid: AcoustIdClient,
cover_extractor: CoverArtExtractor | None = None,
cover_provider: CoverArtProvider | None = None,
acoustid_trust_score: float = 0.85,
) -> None:
self._tracks = tracks
self._artists = artists
self._albums = albums
self._storage = storage
self._tag_reader = tag_reader
self._fingerprinter = fingerprinter
self._acoustid = acoustid
self._cover_extractor = cover_extractor
self._cover_provider = cover_provider
self._acoustid_trust_score = acoustid_trust_score
async def enrich(self, track_id: uuid.UUID) -> EnrichmentResult:
track = await self._tracks.get_by_id(track_id)
if track is None:
log.info("enrich_track_missing", track_id=str(track_id))
return EnrichmentResult(track_id=track_id, status="skipped")
if track.metadata_status == "manual":
log.info("enrich_skip_manual", track_id=str(track_id))
return EnrichmentResult(track_id=track_id, status="skipped")
storage_uri = track.storage_uri
if storage_uri is None:
log.info("enrich_skip_remote", track_id=str(track_id))
return EnrichmentResult(track_id=track_id, status="skipped")
tags = await self._read_local(storage_uri)
match = await self._identify(storage_uri)
# Merge order is tag-first by default — embedded tags fix the common
# well-tagged offline case. But a *high-confidence* AcoustID match is the
# more trustworthy identity (downloaded files routinely carry junk tags
# like "Music Track"/"Sound_12345"), so above the trust threshold the
# acoustic match wins for the identity fields and tags become fallback.
tag_title = tags.title if tags else None
tag_artist = tags.artist if tags else None
tag_album = tags.album if tags else None
match_title = match.title if match else None
match_artist = match.artist if match else None
match_album = match.album if match else None
match_year = match.year if match else None
tag_year = tags.year if tags else None
trust_match = match is not None and match.score >= self._acoustid_trust_score
if trust_match:
title = _opt_str(match_title, tag_title) or track.title
artist_name = _opt_str(match_artist, tag_artist)
album_title = _opt_str(match_album, tag_album)
year = _first_int(match_year, tag_year)
else:
title = _opt_str(tag_title, match_title) or track.title
artist_name = _opt_str(tag_artist, match_artist)
album_title = _opt_str(tag_album, match_album)
year = _first_int(tag_year, match_year)
genre = tags.genre if tags else None
track_number = tags.track_number if tags else None
duration = _first_int(
tags.duration_seconds if tags else None,
track.duration_seconds,
)
bitrate = tags.bitrate if tags else None
mbid = match.recording_mbid if match else None
acoustid_id = match.acoustid if match else None
artist_id = await self._resolve_artist(artist_name, fallback=track.artist_id)
album = await self._resolve_album(album_title, artist_id=artist_id, year=year, mbid=mbid)
album_id = album.id if album is not None else None
if album is not None:
await self._resolve_cover(
album,
storage_uri=storage_uri,
release_group_mbid=match.release_group_mbid if match else None,
)
identified = bool(artist_name) or album_id is not None or mbid is not None
status = "enriched" if identified else "failed"
# On a clean "no identity" outcome, record *why* so the UI shows a reason
# rather than a bare "failed". A successful run clears any prior error.
metadata_error = None if identified else self._no_match_reason()
await self._tracks.apply_enrichment(
track_id,
title=title,
artist_id=artist_id,
album_id=album_id,
genre=genre,
year=year,
track_number=track_number,
duration_seconds=duration,
bitrate=bitrate,
acoustid_fingerprint=acoustid_id,
musicbrainz_id=mbid,
metadata_status=status,
metadata_error=metadata_error,
)
log.info("enrich_complete", track_id=str(track_id), status=status, mbid=mbid)
return EnrichmentResult(track_id=track_id, status=status, matched_mbid=mbid)
def _no_match_reason(self) -> str:
"""Explain a ``failed`` (no-identity) run in terms a user can act on:
which optional identification step was unavailable, if any."""
if not self._fingerprinter.is_available():
return "No metadata match: audio fingerprinting (fpcalc) is unavailable."
if not self._acoustid.is_available():
return "No metadata match: AcoustID lookup is unavailable (no API key)."
return "No metadata match found in tags or AcoustID."
async def find_matches(self, track_id: uuid.UUID) -> list[RecordingMatch]:
"""AcoustID candidates for the metadata editor's match picker (§A7).
Read-only — unlike :meth:`enrich`, never touches the track. Runs
inline (single track, user-triggered) rather than via the worker.
Degrades to ``[]`` whenever fingerprinting/AcoustID is unavailable or
the file can't be read, same as the enrichment pipeline.
"""
track = await self._tracks.get_by_id(track_id)
if track is None:
return []
if not self._acoustid.is_available() or not self._fingerprinter.is_available():
return []
if track.storage_uri is None:
return []
try:
async with self._storage.as_local_path(track.storage_uri) as path:
fingerprint = await self._fingerprinter.calculate(path)
if fingerprint is None:
return []
return await self._acoustid.lookup_all(fingerprint)
except Exception:
log.warning("find_matches_failed", track_id=str(track_id))
return []
async def _read_local(self, storage_uri: str) -> AudioTags | None:
try:
async with self._storage.as_local_path(storage_uri) as path:
return await self._tag_reader.read(path)
except Exception:
log.warning("enrich_tag_step_failed", storage_uri=storage_uri)
return None
async def _identify(self, storage_uri: str) -> RecordingMatch | None:
if not self._acoustid.is_available() or not self._fingerprinter.is_available():
return None
try:
async with self._storage.as_local_path(storage_uri) as path:
fingerprint = await self._fingerprinter.calculate(path)
if fingerprint is None:
return None
return await self._acoustid.lookup(fingerprint)
except Exception:
log.warning("enrich_identify_step_failed", storage_uri=storage_uri)
return None
async def _resolve_artist(self, name: str | None, *, fallback: uuid.UUID) -> uuid.UUID:
if not name or name == _UNKNOWN_ARTIST:
return fallback
artist = await self._artists.get_or_create(name)
return artist.id
async def _resolve_album(
self,
title: str | None,
*,
artist_id: uuid.UUID,
year: int | None,
mbid: str | None,
) -> Album | None:
if not title:
return None
return await self._albums.get_or_create(
title=title,
artist_id=artist_id,
year=year,
musicbrainz_id=mbid,
)
async def _resolve_cover(
self,
album: Album,
*,
storage_uri: str,
release_group_mbid: str | None,
) -> None:
"""Fill in an album cover when it has none. Source order mirrors the
tag-first pipeline: embedded artwork (offline) → Cover Art Archive
(network, by release-group). Best-effort — any failure is swallowed so a
missing cover never affects enrichment status."""
if album.cover_path:
return # already has one — never overwrite (idempotent)
cover = await self._extract_cover(storage_uri)
if cover is None:
cover = await self._fetch_cover(release_group_mbid)
if cover is None:
return
try:
key = await self._save_cover(album.id, cover)
await self._albums.set_cover_path(album.id, key)
log.info("cover_resolved", album_id=str(album.id), content_type=cover.content_type)
except Exception:
log.warning("cover_save_failed", album_id=str(album.id))
async def _extract_cover(self, storage_uri: str) -> CoverArt | None:
if self._cover_extractor is None:
return None
try:
async with self._storage.as_local_path(storage_uri) as path:
return await self._cover_extractor.extract(path)
except Exception:
log.warning("cover_extract_step_failed", storage_uri=storage_uri)
return None
async def _fetch_cover(self, release_group_mbid: str | None) -> CoverArt | None:
if self._cover_provider is None or not release_group_mbid:
return None
if not self._cover_provider.is_available():
return None
try:
return await self._cover_provider.fetch_release_group(release_group_mbid)
except Exception:
log.warning("cover_fetch_step_failed", release_group=release_group_mbid)
return None
async def _save_cover(self, album_id: uuid.UUID, cover: CoverArt) -> str:
key = f"covers/{album_id}.{cover.extension}"
with tempfile.NamedTemporaryFile(suffix=f".{cover.extension}") as tmp:
tmp.write(cover.data)
tmp.flush()
await self._storage.save_file(key, Path(tmp.name))
return key
def _opt_str(*values: str | None) -> str | None:
for value in values:
if value:
return value
return None
def _first_int(*values: int | None) -> int | None:
for value in values:
if value is not None:
return value
return None