feat(enrichment): tag-first metadata pipeline (§1D)
Implements the §6.2 enrichment pipeline: embedded tags → Chromaprint
fingerprint → AcoustID lookup. Well-tagged files get correct
artist/album/title offline; the rest are identified via AcoustID
(which also yields a MusicBrainz recording id in one call).
- domain: AudioTags/Fingerprint/RecordingMatch value objects; ports
AudioTagReader, AudioFingerprinter, AcoustIdClient; TrackRepository
.apply_enrichment (gap-fill, never erases) + AlbumRepository.get_or_create
- infrastructure/metadata: MutagenTagReader, FpcalcFingerprinter,
AcoustIdHttpClient (rich meta=recordings+releasegroups, throttled)
- application: MetadataEnrichmentService — tags preferred, AcoustID fills
gaps; resolves artist/album; status enriched/failed; skips manual;
every external step wrapped (graceful degradation)
- workers: enrich_task registered; enqueue_enrich is best-effort and
deferred so the caller's txn commits before the worker reads the row
- wiring: upload enqueues after add; import returns imported_ids and
enqueues post-commit (mid-scan would race the worker); manual
POST /tracks/{id}/metadata/enrich endpoint
- deps: add mutagen (fpcalc/ffmpeg already in the image)
Tests: metadata service orchestration, AcoustID parser, tag helpers.
125 passed; mypy strict + ruff clean.
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -14,11 +14,14 @@ from typing import Protocol
|
||||
|
||||
from app.domain.entities import (
|
||||
Album,
|
||||
AudioTags,
|
||||
Credentials,
|
||||
Fingerprint,
|
||||
Like,
|
||||
ObjectStat,
|
||||
PlayHistoryEntry,
|
||||
Playlist,
|
||||
RecordingMatch,
|
||||
SubsonicCredentials,
|
||||
User,
|
||||
)
|
||||
@@ -153,9 +156,38 @@ class TrackRepository(Protocol):
|
||||
genre: str | None,
|
||||
year: int | None,
|
||||
) -> Track: ...
|
||||
async def apply_enrichment(
|
||||
self,
|
||||
track_id: uuid.UUID,
|
||||
*,
|
||||
title: str,
|
||||
artist_id: uuid.UUID,
|
||||
album_id: uuid.UUID | None,
|
||||
genre: str | None,
|
||||
year: int | None,
|
||||
track_number: int | None,
|
||||
duration_seconds: int | None,
|
||||
bitrate: int | None,
|
||||
acoustid_fingerprint: str | None,
|
||||
musicbrainz_id: str | None,
|
||||
metadata_status: str,
|
||||
) -> Track:
|
||||
"""Persist auto-enrichment results. Nullable fields are filled only when
|
||||
a non-``None`` value is supplied (re-enrich never erases prior data);
|
||||
``title``/``artist_id``/``metadata_status`` are always written. Callers
|
||||
must not invoke this for ``metadata_status == 'manual'`` tracks."""
|
||||
...
|
||||
|
||||
|
||||
class AlbumRepository(Protocol):
|
||||
async def get_or_create(
|
||||
self,
|
||||
*,
|
||||
title: str,
|
||||
artist_id: uuid.UUID,
|
||||
year: int | None,
|
||||
musicbrainz_id: str | None,
|
||||
) -> Album: ...
|
||||
async def get_by_id(self, album_id: uuid.UUID) -> Album | None: ...
|
||||
async def get_many(self, ids: list[uuid.UUID]) -> list[Album]: ...
|
||||
async def count(self, *, artist_id: uuid.UUID | None, q: str | None) -> int: ...
|
||||
@@ -240,3 +272,28 @@ class IndexableSource(SourceBackend, Protocol):
|
||||
"""A source that enumerates files already on disk (e.g. the local folder)."""
|
||||
|
||||
def scan(self) -> Iterator[SourceFile]: ...
|
||||
|
||||
|
||||
# -- metadata enrichment (plan §6.2) -----------------------------------------
|
||||
class AudioTagReader(Protocol):
|
||||
"""Reads embedded tags from a local audio file. Returns ``None`` only when
|
||||
the file can't be parsed at all — never raises (graceful degradation)."""
|
||||
|
||||
async def read(self, path: Path) -> AudioTags | None: ...
|
||||
|
||||
|
||||
class AudioFingerprinter(Protocol):
|
||||
"""Chromaprint (fpcalc) wrapper. ``is_available`` reflects whether the
|
||||
binary is present; ``calculate`` returns ``None`` on any failure."""
|
||||
|
||||
def is_available(self) -> bool: ...
|
||||
async def calculate(self, path: Path) -> Fingerprint | None: ...
|
||||
|
||||
|
||||
class AcoustIdClient(Protocol):
|
||||
"""AcoustID lookup. ``is_available`` is False without an API key (the whole
|
||||
fingerprint path is then skipped). ``lookup`` returns the best match or
|
||||
``None`` (no result / service down), never raising."""
|
||||
|
||||
def is_available(self) -> bool: ...
|
||||
async def lookup(self, fingerprint: Fingerprint) -> RecordingMatch | None: ...
|
||||
|
||||
Reference in New Issue
Block a user