feat(enrichment): tag-first metadata pipeline (§1D)

Implements the §6.2 enrichment pipeline: embedded tags → Chromaprint fingerprint → AcoustID lookup. Well-tagged files get correct artist/album/title offline; the rest are identified via AcoustID (which also yields a MusicBrainz recording id in one call). - domain: AudioTags/Fingerprint/RecordingMatch value objects; ports AudioTagReader, AudioFingerprinter, AcoustIdClient; TrackRepository .apply_enrichment (gap-fill, never erases) + AlbumRepository.get_or_create - infrastructure/metadata: MutagenTagReader, FpcalcFingerprinter, AcoustIdHttpClient (rich meta=recordings+releasegroups, throttled) - application: MetadataEnrichmentService — tags preferred, AcoustID fills gaps; resolves artist/album; status enriched/failed; skips manual; every external step wrapped (graceful degradation) - workers: enrich_task registered; enqueue_enrich is best-effort and deferred so the caller's txn commits before the worker reads the row - wiring: upload enqueues after add; import returns imported_ids and enqueues post-commit (mid-scan would race the worker); manual POST /tracks/{id}/metadata/enrich endpoint - deps: add mutagen (fpcalc/ffmpeg already in the image) Tests: metadata service orchestration, AcoustID parser, tag helpers. 125 passed; mypy strict + ruff clean. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-09 13:04:02 +03:00
parent 48e3418c7f
commit c72d19599a
24 changed files with 1934 additions and 763 deletions
@@ -37,6 +37,7 @@ from app.infrastructure.db.repositories import (
 )
 from app.infrastructure.sources.registry import SourceRegistry, build_source_registry
 from app.infrastructure.storage.provider import get_file_storage
+from app.workers.queue import enqueue_enrich


 async def get_session() -> AsyncIterator[AsyncSession]:
@@ -120,6 +121,7 @@ def get_upload_service(session: SessionDep, storage: FileStorageDep) -> UploadSe
        artists=SqlAlchemyArtistRepository(session),
        storage=storage,
        tmp_dir=settings.upload_tmp_dir,
+        enqueue_enrich=enqueue_enrich,
    )


@@ -11,6 +11,7 @@ from app.api.schemas.track import TrackOut, TrackUpdate
 from app.domain.entities.album import Album
 from app.domain.entities.track import Artist, Track
 from app.domain.errors import NotFoundError
+from app.workers.queue import enqueue

 router = APIRouter(prefix="/tracks", tags=["tracks"])

@@ -147,7 +148,18 @@ async def get_track_cover(track_id: uuid.UUID, _: CurrentUser) -> Any: ...


@router.post("/{track_id}/metadata/enrich")
-async def enrich_metadata(track_id: uuid.UUID, _: CurrentUser) -> Any: ...
+async def enrich_metadata(
+    track_id: uuid.UUID,
+    track_repo: TrackRepoDep,
+    _: CurrentUser,
+) -> dict[str, str]:
+    """Re-run metadata enrichment for a track (admin/user-triggered). The work
+    happens in a worker; this only enqueues it. 503 if the queue is down."""
+    track = await track_repo.get_by_id(track_id)
+    if track is None:
+        raise NotFoundError(f"Track {track_id} not found.")
+    job_id = await enqueue("enrich_track", track_id=str(track_id))
+    return {"track_id": str(track_id), "job_id": job_id}


@router.get("/{track_id}/metadata/matches")
@@ -9,7 +9,7 @@ must not abort the whole scan (graceful degradation).

 import contextlib
 import uuid
-from dataclasses import dataclass
+from dataclasses import dataclass, field

 from app.core.logging import get_logger
 from app.domain.ports import ArtistRepository, FileStorage, IndexableSource, TrackRepository
@@ -27,6 +27,9 @@ class ImportSummary:
    imported: int
    skipped: int
    failed: int
+    # IDs of freshly imported tracks, for the caller to enqueue enrichment
+    # *after* its transaction commits (enqueuing mid-scan would race the worker).
+    imported_ids: list[uuid.UUID] = field(default_factory=list)


 class LibraryImportService:
@@ -44,7 +47,8 @@ class LibraryImportService:
    async def scan_and_import(
        self, source: IndexableSource, *, added_by: uuid.UUID | None
    ) -> ImportSummary:
-        seen = imported = skipped = failed = 0
+        seen = skipped = failed = 0
+        imported_ids: list[uuid.UUID] = []
        for file in source.scan():
            seen += 1
            try:
@@ -52,13 +56,18 @@ class LibraryImportService:
                if existing is not None:
                    skipped += 1
                    continue
-                await self._import_one(source.name, file, added_by)
-                imported += 1
+                track_id = await self._import_one(source.name, file, added_by)
+                imported_ids.append(track_id)
            except Exception:
                failed += 1
                log.warning("import_file_failed", source=source.name, source_id=file.source_id)
        summary = ImportSummary(
-            source=source.name, seen=seen, imported=imported, skipped=skipped, failed=failed
+            source=source.name,
+            seen=seen,
+            imported=len(imported_ids),
+            skipped=skipped,
+            failed=failed,
+            imported_ids=imported_ids,
        )
        log.info(
            "import_complete",
@@ -72,7 +81,7 @@ class LibraryImportService:

    async def _import_one(
        self, source_name: str, file: SourceFile, added_by: uuid.UUID | None
-    ) -> None:
+    ) -> uuid.UUID:
        track_id = uuid.uuid4()
        key = f"tracks/{str(track_id)[:2]}/{track_id}.{file.file_format}"
        await self._storage.save_file(key, file.path)
@@ -94,3 +103,4 @@ class LibraryImportService:
            with contextlib.suppress(Exception):
                await self._storage.delete(key)
            raise
+        return track_id
@@ -0,0 +1,174 @@
+"""MetadataEnrichmentService — the §6.2 pipeline orchestrator.
+
+Order (tag-first): embedded tags → Chromaprint fingerprint → AcoustID lookup.
+Tags fix the common well-tagged case offline; AcoustID identifies the rest and
+supplies a MusicBrainz id. The result updates the track and sets
+``metadata_status`` to ``enriched`` (identity found) or ``failed`` (nothing).
+
+Invariants (plan §6.2, CLAUDE.md):
+- **Never touch ``manual``** — a user-edited track is returned untouched.
+- **Graceful degradation** — every external step is wrapped; one failure (no
+  fpcalc, no API key, service down) degrades the result, never crashes.
+- **Idempotent** — re-running only fills gaps; ``apply_enrichment`` never erases.
+"""
+
+import uuid
+from dataclasses import dataclass
+
+from app.core.logging import get_logger
+from app.domain.entities.metadata import AudioTags, RecordingMatch
+from app.domain.ports import (
+    AcoustIdClient,
+    AlbumRepository,
+    ArtistRepository,
+    AudioFingerprinter,
+    AudioTagReader,
+    FileStorage,
+    TrackRepository,
+)
+
+log = get_logger(__name__)
+
+_UNKNOWN_ARTIST = "Unknown Artist"
+
+
+@dataclass(frozen=True)
+class EnrichmentResult:
+    track_id: uuid.UUID
+    status: str  # "enriched" | "failed" | "skipped"
+    matched_mbid: str | None = None
+
+
+class MetadataEnrichmentService:
+    def __init__(
+        self,
+        *,
+        tracks: TrackRepository,
+        artists: ArtistRepository,
+        albums: AlbumRepository,
+        storage: FileStorage,
+        tag_reader: AudioTagReader,
+        fingerprinter: AudioFingerprinter,
+        acoustid: AcoustIdClient,
+    ) -> None:
+        self._tracks = tracks
+        self._artists = artists
+        self._albums = albums
+        self._storage = storage
+        self._tag_reader = tag_reader
+        self._fingerprinter = fingerprinter
+        self._acoustid = acoustid
+
+    async def enrich(self, track_id: uuid.UUID) -> EnrichmentResult:
+        track = await self._tracks.get_by_id(track_id)
+        if track is None:
+            log.info("enrich_track_missing", track_id=str(track_id))
+            return EnrichmentResult(track_id=track_id, status="skipped")
+        if track.metadata_status == "manual":
+            log.info("enrich_skip_manual", track_id=str(track_id))
+            return EnrichmentResult(track_id=track_id, status="skipped")
+
+        tags = await self._read_local(track.storage_uri)
+        match = await self._identify(track.storage_uri)
+
+        # Merge sources: prefer embedded tags, fall back to the AcoustID match.
+        # ``title`` is guaranteed non-None by the existing track title; the rest
+        # stay None when neither source has them.
+        tag_title = tags.title if tags else None
+        tag_artist = tags.artist if tags else None
+        tag_album = tags.album if tags else None
+        title = _opt_str(tag_title, match.title if match else None) or track.title
+        artist_name = _opt_str(tag_artist, match.artist if match else None)
+        album_title = _opt_str(tag_album, match.album if match else None)
+        year = _first_int(tags.year if tags else None, match.year if match else None)
+        genre = tags.genre if tags else None
+        track_number = tags.track_number if tags else None
+        duration = _first_int(
+            tags.duration_seconds if tags else None,
+            track.duration_seconds,
+        )
+        bitrate = tags.bitrate if tags else None
+        mbid = match.recording_mbid if match else None
+        acoustid_id = match.acoustid if match else None
+
+        artist_id = await self._resolve_artist(artist_name, fallback=track.artist_id)
+        album_id = await self._resolve_album(album_title, artist_id=artist_id, year=year, mbid=mbid)
+
+        identified = bool(artist_name) or album_id is not None or mbid is not None
+        status = "enriched" if identified else "failed"
+
+        await self._tracks.apply_enrichment(
+            track_id,
+            title=title,
+            artist_id=artist_id,
+            album_id=album_id,
+            genre=genre,
+            year=year,
+            track_number=track_number,
+            duration_seconds=duration,
+            bitrate=bitrate,
+            acoustid_fingerprint=acoustid_id,
+            musicbrainz_id=mbid,
+            metadata_status=status,
+        )
+        log.info("enrich_complete", track_id=str(track_id), status=status, mbid=mbid)
+        return EnrichmentResult(track_id=track_id, status=status, matched_mbid=mbid)
+
+    async def _read_local(self, storage_uri: str) -> AudioTags | None:
+        try:
+            async with self._storage.as_local_path(storage_uri) as path:
+                return await self._tag_reader.read(path)
+        except Exception:
+            log.warning("enrich_tag_step_failed", storage_uri=storage_uri)
+            return None
+
+    async def _identify(self, storage_uri: str) -> RecordingMatch | None:
+        if not self._acoustid.is_available() or not self._fingerprinter.is_available():
+            return None
+        try:
+            async with self._storage.as_local_path(storage_uri) as path:
+                fingerprint = await self._fingerprinter.calculate(path)
+            if fingerprint is None:
+                return None
+            return await self._acoustid.lookup(fingerprint)
+        except Exception:
+            log.warning("enrich_identify_step_failed", storage_uri=storage_uri)
+            return None
+
+    async def _resolve_artist(self, name: str | None, *, fallback: uuid.UUID) -> uuid.UUID:
+        if not name or name == _UNKNOWN_ARTIST:
+            return fallback
+        artist = await self._artists.get_or_create(name)
+        return artist.id
+
+    async def _resolve_album(
+        self,
+        title: str | None,
+        *,
+        artist_id: uuid.UUID,
+        year: int | None,
+        mbid: str | None,
+    ) -> uuid.UUID | None:
+        if not title:
+            return None
+        album = await self._albums.get_or_create(
+            title=title,
+            artist_id=artist_id,
+            year=year,
+            musicbrainz_id=mbid,
+        )
+        return album.id
+
+
+def _opt_str(*values: str | None) -> str | None:
+    for value in values:
+        if value:
+            return value
+    return None
+
+
+def _first_int(*values: int | None) -> int | None:
+    for value in values:
+        if value is not None:
+            return value
+    return None
@@ -5,6 +5,7 @@ import hashlib
 import os
 import tempfile
 import uuid
+from collections.abc import Awaitable, Callable
 from dataclasses import dataclass
 from pathlib import Path
 from typing import Protocol
@@ -14,6 +15,8 @@ import anyio
 from app.domain.entities.user import User
 from app.domain.ports import ArtistRepository, FileStorage, TrackRepository

+EnrichEnqueuer = Callable[[uuid.UUID], Awaitable[None]]
+

 class UploadFileProtocol(Protocol):
    filename: str | None
@@ -49,11 +52,13 @@ class UploadService:
        artists: ArtistRepository,
        storage: FileStorage,
        tmp_dir: Path | None = None,
+        enqueue_enrich: EnrichEnqueuer | None = None,
    ) -> None:
        self._tracks = tracks
        self._artists = artists
        self._storage = storage
        self._tmp_dir = tmp_dir
+        self._enqueue_enrich = enqueue_enrich

    async def handle_upload(
        self,
@@ -105,7 +110,8 @@ class UploadService:
                    await self._storage.delete(key)
                raise

-            # TODO(1D): enqueue metadata enrichment task
+            if self._enqueue_enrich is not None:
+                await self._enqueue_enrich(track.id)

            return UploadResult(
                track_id=track.id,
@@ -73,9 +73,15 @@ class Settings(BaseSettings):
    # -- external services (all optional; graceful degradation) ----------
    ml_service_url: str | None = None
    acoustid_api_key: SecretStr | None = None
+    acoustid_api_url: str = "https://api.acoustid.org/v2/lookup"
    musicbrainz_user_agent: str = "mcma-backend/0.1.0 ( https://github.com/your/repo )"
    youtube_cookies_path: Path | None = None

+    # -- enrichment -------------------------------------------------------
+    # ``fpcalc`` (Chromaprint) binary; resolved on PATH by default. The Docker
+    # image installs it via libchromaprint-tools.
+    fpcalc_path: str = "fpcalc"
+
    @field_validator("database_url")
    @classmethod
    def _require_async_driver(cls, v: str) -> str:
@@ -3,6 +3,7 @@
 from app.domain.entities.album import Album
 from app.domain.entities.history import PlayHistoryEntry
 from app.domain.entities.like import Like
+from app.domain.entities.metadata import AudioTags, Fingerprint, RecordingMatch
 from app.domain.entities.playlist import Playlist
 from app.domain.entities.storage import ObjectStat
 from app.domain.entities.track import Artist, Track
@@ -11,11 +12,14 @@ from app.domain.entities.user import Credentials, SubsonicCredentials, User
 __all__ = [
    "Album",
    "Artist",
+    "AudioTags",
    "Credentials",
+    "Fingerprint",
    "Like",
    "ObjectStat",
    "PlayHistoryEntry",
    "Playlist",
+    "RecordingMatch",
    "SubsonicCredentials",
    "Track",
    "User",
@@ -0,0 +1,53 @@
+"""Value objects for the metadata-enrichment pipeline (plan §6.2).
+
+Pure data carriers between the enrichment service and its adapters (tag reader,
+fingerprinter, AcoustID). No framework imports — these cross the domain boundary.
+"""
+
+from dataclasses import dataclass
+
+
+@dataclass(frozen=True, slots=True)
+class AudioTags:
+    """Embedded tags read from the file itself (ID3 / Vorbis / MP4 …).
+
+    Every field is optional — files are tagged inconsistently. The reader fills
+    what it can and leaves the rest ``None`` for downstream identification.
+    """
+
+    title: str | None = None
+    artist: str | None = None
+    album: str | None = None
+    album_artist: str | None = None
+    genre: str | None = None
+    year: int | None = None
+    track_number: int | None = None
+    duration_seconds: int | None = None
+    bitrate: int | None = None
+
+
+@dataclass(frozen=True, slots=True)
+class Fingerprint:
+    """Chromaprint fingerprint plus the decoded duration (both needed by AcoustID)."""
+
+    fingerprint: str
+    duration_seconds: int
+
+
+@dataclass(frozen=True, slots=True)
+class RecordingMatch:
+    """A single AcoustID result, flattened to the fields enrichment cares about.
+
+    ``acoustid`` is the stable AcoustID identifier (a UUID) — used as the
+    dedup key persisted on ``track.acoustid_fingerprint`` (fits the 64-char
+    column; the raw fingerprint does not). ``recording_mbid`` is the MusicBrainz
+    recording id when present.
+    """
+
+    acoustid: str
+    score: float
+    recording_mbid: str | None = None
+    title: str | None = None
+    artist: str | None = None
+    album: str | None = None
+    year: int | None = None
@@ -14,11 +14,14 @@ from typing import Protocol

 from app.domain.entities import (
    Album,
+    AudioTags,
    Credentials,
+    Fingerprint,
    Like,
    ObjectStat,
    PlayHistoryEntry,
    Playlist,
+    RecordingMatch,
    SubsonicCredentials,
    User,
 )
@@ -153,9 +156,38 @@ class TrackRepository(Protocol):
        genre: str | None,
        year: int | None,
    ) -> Track: ...
+    async def apply_enrichment(
+        self,
+        track_id: uuid.UUID,
+        *,
+        title: str,
+        artist_id: uuid.UUID,
+        album_id: uuid.UUID | None,
+        genre: str | None,
+        year: int | None,
+        track_number: int | None,
+        duration_seconds: int | None,
+        bitrate: int | None,
+        acoustid_fingerprint: str | None,
+        musicbrainz_id: str | None,
+        metadata_status: str,
+    ) -> Track:
+        """Persist auto-enrichment results. Nullable fields are filled only when
+        a non-``None`` value is supplied (re-enrich never erases prior data);
+        ``title``/``artist_id``/``metadata_status`` are always written. Callers
+        must not invoke this for ``metadata_status == 'manual'`` tracks."""
+        ...


 class AlbumRepository(Protocol):
+    async def get_or_create(
+        self,
+        *,
+        title: str,
+        artist_id: uuid.UUID,
+        year: int | None,
+        musicbrainz_id: str | None,
+    ) -> Album: ...
    async def get_by_id(self, album_id: uuid.UUID) -> Album | None: ...
    async def get_many(self, ids: list[uuid.UUID]) -> list[Album]: ...
    async def count(self, *, artist_id: uuid.UUID | None, q: str | None) -> int: ...
@@ -240,3 +272,28 @@ class IndexableSource(SourceBackend, Protocol):
    """A source that enumerates files already on disk (e.g. the local folder)."""

    def scan(self) -> Iterator[SourceFile]: ...
+
+
+# -- metadata enrichment (plan §6.2) -----------------------------------------
+class AudioTagReader(Protocol):
+    """Reads embedded tags from a local audio file. Returns ``None`` only when
+    the file can't be parsed at all — never raises (graceful degradation)."""
+
+    async def read(self, path: Path) -> AudioTags | None: ...
+
+
+class AudioFingerprinter(Protocol):
+    """Chromaprint (fpcalc) wrapper. ``is_available`` reflects whether the
+    binary is present; ``calculate`` returns ``None`` on any failure."""
+
+    def is_available(self) -> bool: ...
+    async def calculate(self, path: Path) -> Fingerprint | None: ...
+
+
+class AcoustIdClient(Protocol):
+    """AcoustID lookup. ``is_available`` is False without an API key (the whole
+    fingerprint path is then skipped). ``lookup`` returns the best match or
+    ``None`` (no result / service down), never raising."""
+
+    def is_available(self) -> bool: ...
+    async def lookup(self, fingerprint: Fingerprint) -> RecordingMatch | None: ...
@@ -27,6 +27,42 @@ class SqlAlchemyAlbumRepository:
    def __init__(self, session: AsyncSession) -> None:
        self._session = session

+    async def get_or_create(
+        self,
+        *,
+        title: str,
+        artist_id: uuid.UUID,
+        year: int | None,
+        musicbrainz_id: str | None,
+    ) -> Album:
+        """Resolve an album by ``(title, artist_id)``, creating it if absent.
+        Backfills ``year``/``musicbrainz_id`` onto an existing row when it lacks
+        them and enrichment now has values (gap-fill, never overwrite)."""
+        row = (
+            await self._session.execute(
+                select(AlbumModel).where(
+                    AlbumModel.title == title,
+                    AlbumModel.artist_id == artist_id,
+                )
+            )
+        ).scalar_one_or_none()
+        if row is None:
+            row = AlbumModel(
+                title=title,
+                artist_id=artist_id,
+                year=year,
+                musicbrainz_id=musicbrainz_id,
+            )
+            self._session.add(row)
+        else:
+            if row.year is None and year is not None:
+                row.year = year
+            if row.musicbrainz_id is None and musicbrainz_id is not None:
+                row.musicbrainz_id = musicbrainz_id
+        await self._session.flush()
+        await self._session.refresh(row)
+        return _to_entity(row)
+
    async def get_by_id(self, album_id: uuid.UUID) -> Album | None:
        row = await self._session.get(AlbumModel, album_id)
        return _to_entity(row) if row is not None else None
@@ -173,3 +173,47 @@ class SqlAlchemyTrackRepository:
        await self._session.flush()
        await self._session.refresh(row)
        return _to_entity(row)
+
+    async def apply_enrichment(
+        self,
+        track_id: uuid.UUID,
+        *,
+        title: str,
+        artist_id: uuid.UUID,
+        album_id: uuid.UUID | None,
+        genre: str | None,
+        year: int | None,
+        track_number: int | None,
+        duration_seconds: int | None,
+        bitrate: int | None,
+        acoustid_fingerprint: str | None,
+        musicbrainz_id: str | None,
+        metadata_status: str,
+    ) -> Track:
+        row = await self._session.get(TrackModel, track_id)
+        if row is None:
+            raise NotFoundError(f"Track {track_id} not found.")
+        # Identity + status are authoritative for an enrichment run.
+        row.title = title
+        row.artist_id = artist_id
+        row.metadata_status = metadata_status
+        # Nullable extras: fill gaps only — never erase data a prior run found.
+        if album_id is not None:
+            row.album_id = album_id
+        if genre is not None:
+            row.genre = genre
+        if year is not None:
+            row.year = year
+        if track_number is not None:
+            row.track_number = track_number
+        if duration_seconds is not None:
+            row.duration_seconds = duration_seconds
+        if bitrate is not None:
+            row.bitrate = bitrate
+        if acoustid_fingerprint is not None:
+            row.acoustid_fingerprint = acoustid_fingerprint
+        if musicbrainz_id is not None:
+            row.musicbrainz_id = musicbrainz_id
+        await self._session.flush()
+        await self._session.refresh(row)
+        return _to_entity(row)
@@ -0,0 +1 @@
+"""Metadata-enrichment adapters: tag reader, fingerprinter, AcoustID client."""
@@ -0,0 +1,129 @@
+"""AcoustIdHttpClient — identifies a recording from its Chromaprint fingerprint.
+
+One ``/v2/lookup`` call with ``meta=recordings+releasegroups`` returns the
+AcoustID id, the MusicBrainz recording id, and canonical title/artist/album —
+metadata that itself originates from MusicBrainz, so a separate MB call is not
+needed for Phase 1 (plan §6.2 steps 2-3 collapsed into one request).
+
+Graceful degradation: no API key → ``is_available()`` is False and the whole
+fingerprint path is skipped; any network/parse error → ``lookup`` returns
+``None``. A small inter-call delay keeps us within AcoustID's rate limit.
+"""
+
+import asyncio
+import time
+
+import httpx
+
+from app.core.logging import get_logger
+from app.domain.entities.metadata import Fingerprint, RecordingMatch
+
+log = get_logger(__name__)
+
+_DEFAULT_URL = "https://api.acoustid.org/v2/lookup"
+_TIMEOUT_SECONDS = 10.0
+_MIN_INTERVAL_SECONDS = 0.34  # AcoustID allows ~3 req/s; stay polite
+
+
+class AcoustIdHttpClient:
+    """Implements :class:`app.domain.ports.AcoustIdClient`."""
+
+    _throttle_lock = asyncio.Lock()
+    _last_call_monotonic = 0.0
+
+    def __init__(
+        self,
+        *,
+        api_key: str | None,
+        user_agent: str,
+        api_url: str = _DEFAULT_URL,
+    ) -> None:
+        self._api_key = api_key
+        self._user_agent = user_agent
+        self._api_url = api_url
+
+    def is_available(self) -> bool:
+        return bool(self._api_key)
+
+    async def lookup(self, fingerprint: Fingerprint) -> RecordingMatch | None:
+        if not self._api_key:
+            return None
+        try:
+            await self._throttle()
+            async with httpx.AsyncClient(
+                timeout=_TIMEOUT_SECONDS,
+                headers={"User-Agent": self._user_agent},
+            ) as client:
+                resp = await client.get(
+                    self._api_url,
+                    params={
+                        "client": self._api_key,
+                        "duration": str(fingerprint.duration_seconds),
+                        "fingerprint": fingerprint.fingerprint,
+                        "meta": "recordings releasegroups",
+                        "format": "json",
+                    },
+                )
+            resp.raise_for_status()
+            payload = resp.json()
+        except (httpx.HTTPError, ValueError):
+            log.warning("acoustid_lookup_failed")
+            return None
+
+        return _parse_best_match(payload)
+
+    @classmethod
+    async def _throttle(cls) -> None:
+        async with cls._throttle_lock:
+            elapsed = time.monotonic() - cls._last_call_monotonic
+            wait = _MIN_INTERVAL_SECONDS - elapsed
+            if wait > 0:
+                await asyncio.sleep(wait)
+            cls._last_call_monotonic = time.monotonic()
+
+
+def _parse_best_match(payload: object) -> RecordingMatch | None:
+    if not isinstance(payload, dict) or payload.get("status") != "ok":
+        return None
+    results = payload.get("results")
+    if not isinstance(results, list) or not results:
+        return None
+
+    # Results are returned best-score-first; take the top scoring one.
+    best = max(results, key=lambda r: r.get("score", 0.0) if isinstance(r, dict) else 0.0)
+    if not isinstance(best, dict):
+        return None
+
+    acoustid = best.get("id")
+    if not isinstance(acoustid, str):
+        return None
+    score = float(best.get("score", 0.0))
+
+    recording_mbid: str | None = None
+    title: str | None = None
+    artist: str | None = None
+    album: str | None = None
+
+    recordings = best.get("recordings")
+    if isinstance(recordings, list) and recordings and isinstance(recordings[0], dict):
+        rec = recordings[0]
+        recording_mbid = rec.get("id") if isinstance(rec.get("id"), str) else None
+        title = rec.get("title") if isinstance(rec.get("title"), str) else None
+        artists = rec.get("artists")
+        if isinstance(artists, list) and artists and isinstance(artists[0], dict):
+            name = artists[0].get("name")
+            artist = name if isinstance(name, str) else None
+        groups = rec.get("releasegroups")
+        if isinstance(groups, list) and groups and isinstance(groups[0], dict):
+            gtitle = groups[0].get("title")
+            album = gtitle if isinstance(gtitle, str) else None
+
+    return RecordingMatch(
+        acoustid=acoustid,
+        score=score,
+        recording_mbid=recording_mbid,
+        title=title,
+        artist=artist,
+        album=album,
+        year=None,
+    )
@@ -0,0 +1,62 @@
+"""FpcalcFingerprinter — Chromaprint fingerprint via the ``fpcalc`` binary.
+
+``fpcalc -json <file>`` emits ``{"duration": float, "fingerprint": str}``. The
+binary ships in the Docker image (``libchromaprint-tools``). Any failure (binary
+missing, bad file, timeout) degrades to ``None`` — the pipeline then falls back
+to tag-only metadata (plan §6.2: one external dependency must never crash it).
+"""
+
+import asyncio
+import json
+import shutil
+from pathlib import Path
+
+from app.core.logging import get_logger
+from app.domain.entities.metadata import Fingerprint
+
+log = get_logger(__name__)
+
+_TIMEOUT_SECONDS = 30
+
+
+class FpcalcFingerprinter:
+    """Implements :class:`app.domain.ports.AudioFingerprinter`."""
+
+    def __init__(self, binary: str = "fpcalc") -> None:
+        self._binary = binary
+
+    def is_available(self) -> bool:
+        return shutil.which(self._binary) is not None
+
+    async def calculate(self, path: Path) -> Fingerprint | None:
+        if not self.is_available():
+            return None
+        try:
+            proc = await asyncio.create_subprocess_exec(
+                self._binary,
+                "-json",
+                str(path),
+                stdout=asyncio.subprocess.PIPE,
+                stderr=asyncio.subprocess.PIPE,
+            )
+            async with asyncio.timeout(_TIMEOUT_SECONDS):
+                stdout, _stderr = await proc.communicate()
+        except (TimeoutError, OSError):
+            log.warning("fpcalc_failed", path=str(path))
+            return None
+
+        if proc.returncode != 0:
+            log.warning("fpcalc_nonzero", path=str(path), returncode=proc.returncode)
+            return None
+
+        try:
+            data = json.loads(stdout)
+            fingerprint = str(data["fingerprint"])
+            duration = round(float(data["duration"]))
+        except (json.JSONDecodeError, KeyError, ValueError):
+            log.warning("fpcalc_bad_output", path=str(path))
+            return None
+
+        if not fingerprint or duration <= 0:
+            return None
+        return Fingerprint(fingerprint=fingerprint, duration_seconds=duration)
@@ -0,0 +1,88 @@
+"""MutagenTagReader — reads embedded tags from a local audio file.
+
+The offline first pass of enrichment (plan §6.2): well-tagged files get correct
+artist/album/title without any network call. mutagen's ``easy=True`` mode
+normalises tag keys across ID3 / Vorbis / MP4, so one code path covers all the
+formats the library accepts. Parsing is blocking, so it runs in a worker thread.
+"""
+
+import re
+from pathlib import Path
+
+import anyio
+from mutagen import File as MutagenFile  # type: ignore[attr-defined]
+
+from app.core.logging import get_logger
+from app.domain.entities.metadata import AudioTags
+
+log = get_logger(__name__)
+
+_YEAR_RE = re.compile(r"(\d{4})")
+
+
+def _first(value: object) -> str | None:
+    """EasyXxx tags expose values as lists; take the first non-empty string."""
+    if isinstance(value, list):
+        value = value[0] if value else None
+    if value is None:
+        return None
+    text = str(value).strip()
+    return text or None
+
+
+def _parse_year(value: object) -> int | None:
+    text = _first(value)
+    if text is None:
+        return None
+    m = _YEAR_RE.search(text)
+    return int(m.group(1)) if m else None
+
+
+def _parse_track_number(value: object) -> int | None:
+    text = _first(value)
+    if text is None:
+        return None
+    # "3" or "3/12" → 3
+    head = text.split("/", 1)[0].strip()
+    return int(head) if head.isdigit() else None
+
+
+class MutagenTagReader:
+    """Implements :class:`app.domain.ports.AudioTagReader`."""
+
+    async def read(self, path: Path) -> AudioTags | None:
+        try:
+            return await anyio.to_thread.run_sync(self._read_sync, path)
+        except Exception:
+            log.warning("tag_read_failed", path=str(path))
+            return None
+
+    def _read_sync(self, path: Path) -> AudioTags | None:
+        audio = MutagenFile(str(path), easy=True)
+        if audio is None:
+            return None  # unrecognised container
+
+        tags = audio.tags or {}
+        info = getattr(audio, "info", None)
+
+        duration = None
+        bitrate = None
+        if info is not None:
+            length = getattr(info, "length", None)
+            if length:
+                duration = round(float(length))
+            raw_bitrate = getattr(info, "bitrate", None)
+            if raw_bitrate:
+                bitrate = int(raw_bitrate) // 1000  # bits/s → kbps for display
+
+        return AudioTags(
+            title=_first(tags.get("title")),
+            artist=_first(tags.get("artist")),
+            album=_first(tags.get("album")),
+            album_artist=_first(tags.get("albumartist")),
+            genre=_first(tags.get("genre")),
+            year=_parse_year(tags.get("date") or tags.get("year")),
+            track_number=_parse_track_number(tags.get("tracknumber")),
+            duration_seconds=duration,
+            bitrate=bitrate,
+        )
@@ -1,7 +1,7 @@
 """arq worker settings — the queue runtime. Task functions register here.

 Run with: ``arq app.workers.arq_worker.WorkerSettings``.
-Tasks (download, enrich, transcode) are appended to ``functions`` in later steps.
+Tasks (download, transcode) are appended to ``functions`` in later steps.
 """

 from typing import Any, ClassVar
@@ -10,6 +10,7 @@ from arq.connections import RedisSettings

 from app.core.config import get_settings
 from app.core.logging import configure_logging, get_logger
+from app.workers.tasks.enrich_task import enrich_track
 from app.workers.tasks.import_task import scan_local_folder

 log = get_logger("worker")
@@ -26,7 +27,7 @@ async def shutdown(_ctx: dict[str, Any]) -> None:


 class WorkerSettings:
-    functions: ClassVar[list[Any]] = [scan_local_folder]
+    functions: ClassVar[list[Any]] = [scan_local_folder, enrich_track]
    on_startup = startup
    on_shutdown = shutdown
    max_jobs = get_settings().max_parallel_downloads
@@ -4,14 +4,18 @@ A short-lived pool per call keeps things simple (enqueues are rare, admin-driven
 actions). Redis being down degrades to a clean 503 rather than a crash
 (graceful degradation)."""

+import uuid
 from typing import Any

 from arq import create_pool
 from arq.connections import RedisSettings

 from app.core.config import get_settings
+from app.core.logging import get_logger
 from app.domain.errors import DependencyUnavailableError

+log = get_logger("worker.queue")
+

 async def enqueue(function: str, **kwargs: Any) -> str:
    """Enqueue ``function`` by name, returning the job id. Raises
@@ -28,3 +32,18 @@ async def enqueue(function: str, **kwargs: Any) -> str:
    if job is None:
        raise DependencyUnavailableError("Could not enqueue job.")
    return str(job.job_id)
+
+
+async def enqueue_enrich(track_id: uuid.UUID) -> None:
+    """Best-effort enqueue of metadata enrichment for a freshly stored track.
+
+    The track is already persisted, so enrichment is a follow-up, not a barrier:
+    if the queue is unreachable we log and move on (graceful degradation). The
+    track stays ``metadata_status=pending`` and can be re-enriched later.
+
+    Deferred a few seconds so the caller's DB transaction is committed before the
+    worker looks the track up (the upload request commits only after it returns)."""
+    try:
+        await enqueue("enrich_track", track_id=str(track_id), _defer_by=5)
+    except DependencyUnavailableError:
+        log.warning("enrich_enqueue_failed", track_id=str(track_id))
@@ -0,0 +1,56 @@
+"""arq task: enrich one track's metadata (plan §6.2, §1D).
+
+Wires the §6.2 pipeline adapters to :class:`MetadataEnrichmentService` and runs
+it in the worker's own transactional session. Enqueued (deferred) after upload
+and after a local-folder import. Idempotent and best-effort — a missing track or
+a ``manual`` one is a clean no-op.
+"""
+
+import uuid
+from typing import Any
+
+from app.application.metadata_service import MetadataEnrichmentService
+from app.core.config import get_settings
+from app.core.logging import get_logger
+from app.infrastructure.db import session_scope
+from app.infrastructure.db.repositories import (
+    SqlAlchemyAlbumRepository,
+    SqlAlchemyArtistRepository,
+    SqlAlchemyTrackRepository,
+)
+from app.infrastructure.metadata.acoustid import AcoustIdHttpClient
+from app.infrastructure.metadata.fingerprint import FpcalcFingerprinter
+from app.infrastructure.metadata.tags import MutagenTagReader
+from app.infrastructure.storage.provider import get_file_storage
+
+log = get_logger("worker.enrich")
+
+
+async def enrich_track(_ctx: dict[str, Any], *, track_id: str) -> dict[str, Any]:
+    settings = get_settings()
+    api_key = (
+        settings.acoustid_api_key.get_secret_value() if settings.acoustid_api_key else None
+    )
+    acoustid = AcoustIdHttpClient(
+        api_key=api_key,
+        user_agent=settings.musicbrainz_user_agent,
+        api_url=settings.acoustid_api_url,
+    )
+
+    async with session_scope() as session:
+        service = MetadataEnrichmentService(
+            tracks=SqlAlchemyTrackRepository(session),
+            artists=SqlAlchemyArtistRepository(session),
+            albums=SqlAlchemyAlbumRepository(session),
+            storage=get_file_storage(),
+            tag_reader=MutagenTagReader(),
+            fingerprinter=FpcalcFingerprinter(settings.fpcalc_path),
+            acoustid=acoustid,
+        )
+        result = await service.enrich(uuid.UUID(track_id))
+
+    return {
+        "track_id": str(result.track_id),
+        "status": result.status,
+        "mbid": result.matched_mbid,
+    }
@@ -18,6 +18,7 @@ from app.infrastructure.db.repositories import (
 )
 from app.infrastructure.sources.registry import build_source_registry
 from app.infrastructure.storage.provider import get_file_storage
+from app.workers.queue import enqueue_enrich

 log = get_logger("worker.import")

@@ -37,6 +38,11 @@ async def scan_local_folder(
        )
        summary = await service.scan_and_import(backend, added_by=actor)

+    # Enqueue enrichment only after the import transaction has committed above,
+    # so the enrich worker is guaranteed to see the new rows.
+    for track_id in summary.imported_ids:
+        await enqueue_enrich(track_id)
+
    return {
        "source": summary.source,
        "seen": summary.seen,
				`@@ -0,0 +1 @@`
				`"""Metadata-enrichment adapters: tag reader, fingerprinter, AcoustID client."""`