feat(enrichment): tag-first metadata pipeline (§1D)

Implements the §6.2 enrichment pipeline: embedded tags → Chromaprint fingerprint → AcoustID lookup. Well-tagged files get correct artist/album/title offline; the rest are identified via AcoustID (which also yields a MusicBrainz recording id in one call). - domain: AudioTags/Fingerprint/RecordingMatch value objects; ports AudioTagReader, AudioFingerprinter, AcoustIdClient; TrackRepository .apply_enrichment (gap-fill, never erases) + AlbumRepository.get_or_create - infrastructure/metadata: MutagenTagReader, FpcalcFingerprinter, AcoustIdHttpClient (rich meta=recordings+releasegroups, throttled) - application: MetadataEnrichmentService — tags preferred, AcoustID fills gaps; resolves artist/album; status enriched/failed; skips manual; every external step wrapped (graceful degradation) - workers: enrich_task registered; enqueue_enrich is best-effort and deferred so the caller's txn commits before the worker reads the row - wiring: upload enqueues after add; import returns imported_ids and enqueues post-commit (mid-scan would race the worker); manual POST /tracks/{id}/metadata/enrich endpoint - deps: add mutagen (fpcalc/ffmpeg already in the image) Tests: metadata service orchestration, AcoustID parser, tag helpers. 125 passed; mypy strict + ruff clean. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-09 13:04:02 +03:00
parent 48e3418c7f
commit c72d19599a
24 changed files with 1934 additions and 763 deletions
@@ -37,6 +37,7 @@ from app.infrastructure.db.repositories import (
 )
 from app.infrastructure.sources.registry import SourceRegistry, build_source_registry
 from app.infrastructure.storage.provider import get_file_storage
+from app.workers.queue import enqueue_enrich


 async def get_session() -> AsyncIterator[AsyncSession]:
@@ -120,6 +121,7 @@ def get_upload_service(session: SessionDep, storage: FileStorageDep) -> UploadSe
        artists=SqlAlchemyArtistRepository(session),
        storage=storage,
        tmp_dir=settings.upload_tmp_dir,
+        enqueue_enrich=enqueue_enrich,
    )


@@ -11,6 +11,7 @@ from app.api.schemas.track import TrackOut, TrackUpdate
 from app.domain.entities.album import Album
 from app.domain.entities.track import Artist, Track
 from app.domain.errors import NotFoundError
+from app.workers.queue import enqueue

 router = APIRouter(prefix="/tracks", tags=["tracks"])

@@ -147,7 +148,18 @@ async def get_track_cover(track_id: uuid.UUID, _: CurrentUser) -> Any: ...


@router.post("/{track_id}/metadata/enrich")
-async def enrich_metadata(track_id: uuid.UUID, _: CurrentUser) -> Any: ...
+async def enrich_metadata(
+    track_id: uuid.UUID,
+    track_repo: TrackRepoDep,
+    _: CurrentUser,
+) -> dict[str, str]:
+    """Re-run metadata enrichment for a track (admin/user-triggered). The work
+    happens in a worker; this only enqueues it. 503 if the queue is down."""
+    track = await track_repo.get_by_id(track_id)
+    if track is None:
+        raise NotFoundError(f"Track {track_id} not found.")
+    job_id = await enqueue("enrich_track", track_id=str(track_id))
+    return {"track_id": str(track_id), "job_id": job_id}


@router.get("/{track_id}/metadata/matches")
@@ -9,7 +9,7 @@ must not abort the whole scan (graceful degradation).

 import contextlib
 import uuid
-from dataclasses import dataclass
+from dataclasses import dataclass, field

 from app.core.logging import get_logger
 from app.domain.ports import ArtistRepository, FileStorage, IndexableSource, TrackRepository
@@ -27,6 +27,9 @@ class ImportSummary:
    imported: int
    skipped: int
    failed: int
+    # IDs of freshly imported tracks, for the caller to enqueue enrichment
+    # *after* its transaction commits (enqueuing mid-scan would race the worker).
+    imported_ids: list[uuid.UUID] = field(default_factory=list)


 class LibraryImportService:
@@ -44,7 +47,8 @@ class LibraryImportService:
    async def scan_and_import(
        self, source: IndexableSource, *, added_by: uuid.UUID | None
    ) -> ImportSummary:
-        seen = imported = skipped = failed = 0
+        seen = skipped = failed = 0
+        imported_ids: list[uuid.UUID] = []
        for file in source.scan():
            seen += 1
            try:
@@ -52,13 +56,18 @@ class LibraryImportService:
                if existing is not None:
                    skipped += 1
                    continue
-                await self._import_one(source.name, file, added_by)
-                imported += 1
+                track_id = await self._import_one(source.name, file, added_by)
+                imported_ids.append(track_id)
            except Exception:
                failed += 1
                log.warning("import_file_failed", source=source.name, source_id=file.source_id)
        summary = ImportSummary(
-            source=source.name, seen=seen, imported=imported, skipped=skipped, failed=failed
+            source=source.name,
+            seen=seen,
+            imported=len(imported_ids),
+            skipped=skipped,
+            failed=failed,
+            imported_ids=imported_ids,
        )
        log.info(
            "import_complete",
@@ -72,7 +81,7 @@ class LibraryImportService:

    async def _import_one(
        self, source_name: str, file: SourceFile, added_by: uuid.UUID | None
-    ) -> None:
+    ) -> uuid.UUID:
        track_id = uuid.uuid4()
        key = f"tracks/{str(track_id)[:2]}/{track_id}.{file.file_format}"
        await self._storage.save_file(key, file.path)
@@ -94,3 +103,4 @@ class LibraryImportService:
            with contextlib.suppress(Exception):
                await self._storage.delete(key)
            raise
+        return track_id
@@ -0,0 +1,174 @@
+"""MetadataEnrichmentService — the §6.2 pipeline orchestrator.
+
+Order (tag-first): embedded tags → Chromaprint fingerprint → AcoustID lookup.
+Tags fix the common well-tagged case offline; AcoustID identifies the rest and
+supplies a MusicBrainz id. The result updates the track and sets
+``metadata_status`` to ``enriched`` (identity found) or ``failed`` (nothing).
+
+Invariants (plan §6.2, CLAUDE.md):
+- **Never touch ``manual``** — a user-edited track is returned untouched.
+- **Graceful degradation** — every external step is wrapped; one failure (no
+  fpcalc, no API key, service down) degrades the result, never crashes.
+- **Idempotent** — re-running only fills gaps; ``apply_enrichment`` never erases.
+"""
+
+import uuid
+from dataclasses import dataclass
+
+from app.core.logging import get_logger
+from app.domain.entities.metadata import AudioTags, RecordingMatch
+from app.domain.ports import (
+    AcoustIdClient,
+    AlbumRepository,
+    ArtistRepository,
+    AudioFingerprinter,
+    AudioTagReader,
+    FileStorage,
+    TrackRepository,
+)
+
+log = get_logger(__name__)
+
+_UNKNOWN_ARTIST = "Unknown Artist"
+
+
+@dataclass(frozen=True)
+class EnrichmentResult:
+    track_id: uuid.UUID
+    status: str  # "enriched" | "failed" | "skipped"
+    matched_mbid: str | None = None
+
+
+class MetadataEnrichmentService:
+    def __init__(
+        self,
+        *,
+        tracks: TrackRepository,
+        artists: ArtistRepository,
+        albums: AlbumRepository,
+        storage: FileStorage,
+        tag_reader: AudioTagReader,
+        fingerprinter: AudioFingerprinter,
+        acoustid: AcoustIdClient,
+    ) -> None:
+        self._tracks = tracks
+        self._artists = artists
+        self._albums = albums
+        self._storage = storage
+        self._tag_reader = tag_reader
+        self._fingerprinter = fingerprinter
+        self._acoustid = acoustid
+
+    async def enrich(self, track_id: uuid.UUID) -> EnrichmentResult:
+        track = await self._tracks.get_by_id(track_id)
+        if track is None:
+            log.info("enrich_track_missing", track_id=str(track_id))
+            return EnrichmentResult(track_id=track_id, status="skipped")
+        if track.metadata_status == "manual":
+            log.info("enrich_skip_manual", track_id=str(track_id))
+            return EnrichmentResult(track_id=track_id, status="skipped")
+
+        tags = await self._read_local(track.storage_uri)
+        match = await self._identify(track.storage_uri)
+
+        # Merge sources: prefer embedded tags, fall back to the AcoustID match.
+        # ``title`` is guaranteed non-None by the existing track title; the rest
+        # stay None when neither source has them.
+        tag_title = tags.title if tags else None
+        tag_artist = tags.artist if tags else None
+        tag_album = tags.album if tags else None
+        title = _opt_str(tag_title, match.title if match else None) or track.title
+        artist_name = _opt_str(tag_artist, match.artist if match else None)
+        album_title = _opt_str(tag_album, match.album if match else None)
+        year = _first_int(tags.year if tags else None, match.year if match else None)
+        genre = tags.genre if tags else None
+        track_number = tags.track_number if tags else None
+        duration = _first_int(
+            tags.duration_seconds if tags else None,
+            track.duration_seconds,
+        )
+        bitrate = tags.bitrate if tags else None
+        mbid = match.recording_mbid if match else None
+        acoustid_id = match.acoustid if match else None
+
+        artist_id = await self._resolve_artist(artist_name, fallback=track.artist_id)
+        album_id = await self._resolve_album(album_title, artist_id=artist_id, year=year, mbid=mbid)
+
+        identified = bool(artist_name) or album_id is not None or mbid is not None
+        status = "enriched" if identified else "failed"
+
+        await self._tracks.apply_enrichment(
+            track_id,
+            title=title,
+            artist_id=artist_id,
+            album_id=album_id,
+            genre=genre,
+            year=year,
+            track_number=track_number,
+            duration_seconds=duration,
+            bitrate=bitrate,
+            acoustid_fingerprint=acoustid_id,
+            musicbrainz_id=mbid,
+            metadata_status=status,
+        )
+        log.info("enrich_complete", track_id=str(track_id), status=status, mbid=mbid)
+        return EnrichmentResult(track_id=track_id, status=status, matched_mbid=mbid)
+
+    async def _read_local(self, storage_uri: str) -> AudioTags | None:
+        try:
+            async with self._storage.as_local_path(storage_uri) as path:
+                return await self._tag_reader.read(path)
+        except Exception:
+            log.warning("enrich_tag_step_failed", storage_uri=storage_uri)
+            return None
+
+    async def _identify(self, storage_uri: str) -> RecordingMatch | None:
+        if not self._acoustid.is_available() or not self._fingerprinter.is_available():
+            return None
+        try:
+            async with self._storage.as_local_path(storage_uri) as path:
+                fingerprint = await self._fingerprinter.calculate(path)
+            if fingerprint is None:
+                return None
+            return await self._acoustid.lookup(fingerprint)
+        except Exception:
+            log.warning("enrich_identify_step_failed", storage_uri=storage_uri)
+            return None
+
+    async def _resolve_artist(self, name: str | None, *, fallback: uuid.UUID) -> uuid.UUID:
+        if not name or name == _UNKNOWN_ARTIST:
+            return fallback
+        artist = await self._artists.get_or_create(name)
+        return artist.id
+
+    async def _resolve_album(
+        self,
+        title: str | None,
+        *,
+        artist_id: uuid.UUID,
+        year: int | None,
+        mbid: str | None,
+    ) -> uuid.UUID | None:
+        if not title:
+            return None
+        album = await self._albums.get_or_create(
+            title=title,
+            artist_id=artist_id,
+            year=year,
+            musicbrainz_id=mbid,
+        )
+        return album.id
+
+
+def _opt_str(*values: str | None) -> str | None:
+    for value in values:
+        if value:
+            return value
+    return None
+
+
+def _first_int(*values: int | None) -> int | None:
+    for value in values:
+        if value is not None:
+            return value
+    return None
@@ -5,6 +5,7 @@ import hashlib
 import os
 import tempfile
 import uuid
+from collections.abc import Awaitable, Callable
 from dataclasses import dataclass
 from pathlib import Path
 from typing import Protocol
@@ -14,6 +15,8 @@ import anyio
 from app.domain.entities.user import User
 from app.domain.ports import ArtistRepository, FileStorage, TrackRepository

+EnrichEnqueuer = Callable[[uuid.UUID], Awaitable[None]]
+

 class UploadFileProtocol(Protocol):
    filename: str | None
@@ -49,11 +52,13 @@ class UploadService:
        artists: ArtistRepository,
        storage: FileStorage,
        tmp_dir: Path | None = None,
+        enqueue_enrich: EnrichEnqueuer | None = None,
    ) -> None:
        self._tracks = tracks
        self._artists = artists
        self._storage = storage
        self._tmp_dir = tmp_dir
+        self._enqueue_enrich = enqueue_enrich

    async def handle_upload(
        self,
@@ -105,7 +110,8 @@ class UploadService:
                    await self._storage.delete(key)
                raise

-            # TODO(1D): enqueue metadata enrichment task
+            if self._enqueue_enrich is not None:
+                await self._enqueue_enrich(track.id)

            return UploadResult(
                track_id=track.id,
@@ -73,9 +73,15 @@ class Settings(BaseSettings):
    # -- external services (all optional; graceful degradation) ----------
    ml_service_url: str | None = None
    acoustid_api_key: SecretStr | None = None
+    acoustid_api_url: str = "https://api.acoustid.org/v2/lookup"
    musicbrainz_user_agent: str = "mcma-backend/0.1.0 ( https://github.com/your/repo )"
    youtube_cookies_path: Path | None = None

+    # -- enrichment -------------------------------------------------------
+    # ``fpcalc`` (Chromaprint) binary; resolved on PATH by default. The Docker
+    # image installs it via libchromaprint-tools.
+    fpcalc_path: str = "fpcalc"
+
    @field_validator("database_url")
    @classmethod
    def _require_async_driver(cls, v: str) -> str:
@@ -3,6 +3,7 @@
 from app.domain.entities.album import Album
 from app.domain.entities.history import PlayHistoryEntry
 from app.domain.entities.like import Like
+from app.domain.entities.metadata import AudioTags, Fingerprint, RecordingMatch
 from app.domain.entities.playlist import Playlist
 from app.domain.entities.storage import ObjectStat
 from app.domain.entities.track import Artist, Track
@@ -11,11 +12,14 @@ from app.domain.entities.user import Credentials, SubsonicCredentials, User
 __all__ = [
    "Album",
    "Artist",
+    "AudioTags",
    "Credentials",
+    "Fingerprint",
    "Like",
    "ObjectStat",
    "PlayHistoryEntry",
    "Playlist",
+    "RecordingMatch",
    "SubsonicCredentials",
    "Track",
    "User",
@@ -0,0 +1,53 @@
+"""Value objects for the metadata-enrichment pipeline (plan §6.2).
+
+Pure data carriers between the enrichment service and its adapters (tag reader,
+fingerprinter, AcoustID). No framework imports — these cross the domain boundary.
+"""
+
+from dataclasses import dataclass
+
+
+@dataclass(frozen=True, slots=True)
+class AudioTags:
+    """Embedded tags read from the file itself (ID3 / Vorbis / MP4 …).
+
+    Every field is optional — files are tagged inconsistently. The reader fills
+    what it can and leaves the rest ``None`` for downstream identification.
+    """
+
+    title: str | None = None
+    artist: str | None = None
+    album: str | None = None
+    album_artist: str | None = None
+    genre: str | None = None
+    year: int | None = None
+    track_number: int | None = None
+    duration_seconds: int | None = None
+    bitrate: int | None = None
+
+
+@dataclass(frozen=True, slots=True)
+class Fingerprint:
+    """Chromaprint fingerprint plus the decoded duration (both needed by AcoustID)."""
+
+    fingerprint: str
+    duration_seconds: int
+
+
+@dataclass(frozen=True, slots=True)
+class RecordingMatch:
+    """A single AcoustID result, flattened to the fields enrichment cares about.
+
+    ``acoustid`` is the stable AcoustID identifier (a UUID) — used as the
+    dedup key persisted on ``track.acoustid_fingerprint`` (fits the 64-char
+    column; the raw fingerprint does not). ``recording_mbid`` is the MusicBrainz
+    recording id when present.
+    """
+
+    acoustid: str
+    score: float
+    recording_mbid: str | None = None
+    title: str | None = None
+    artist: str | None = None
+    album: str | None = None
+    year: int | None = None
@@ -14,11 +14,14 @@ from typing import Protocol

 from app.domain.entities import (
    Album,
+    AudioTags,
    Credentials,
+    Fingerprint,
    Like,
    ObjectStat,
    PlayHistoryEntry,
    Playlist,
+    RecordingMatch,
    SubsonicCredentials,
    User,
 )
@@ -153,9 +156,38 @@ class TrackRepository(Protocol):
        genre: str | None,
        year: int | None,
    ) -> Track: ...
+    async def apply_enrichment(
+        self,
+        track_id: uuid.UUID,
+        *,
+        title: str,
+        artist_id: uuid.UUID,
+        album_id: uuid.UUID | None,
+        genre: str | None,
+        year: int | None,
+        track_number: int | None,
+        duration_seconds: int | None,
+        bitrate: int | None,
+        acoustid_fingerprint: str | None,
+        musicbrainz_id: str | None,
+        metadata_status: str,
+    ) -> Track:
+        """Persist auto-enrichment results. Nullable fields are filled only when
+        a non-``None`` value is supplied (re-enrich never erases prior data);
+        ``title``/``artist_id``/``metadata_status`` are always written. Callers
+        must not invoke this for ``metadata_status == 'manual'`` tracks."""
+        ...


 class AlbumRepository(Protocol):
+    async def get_or_create(
+        self,
+        *,
+        title: str,
+        artist_id: uuid.UUID,
+        year: int | None,
+        musicbrainz_id: str | None,
+    ) -> Album: ...
    async def get_by_id(self, album_id: uuid.UUID) -> Album | None: ...
    async def get_many(self, ids: list[uuid.UUID]) -> list[Album]: ...
    async def count(self, *, artist_id: uuid.UUID | None, q: str | None) -> int: ...
@@ -240,3 +272,28 @@ class IndexableSource(SourceBackend, Protocol):
    """A source that enumerates files already on disk (e.g. the local folder)."""

    def scan(self) -> Iterator[SourceFile]: ...
+
+
+# -- metadata enrichment (plan §6.2) -----------------------------------------
+class AudioTagReader(Protocol):
+    """Reads embedded tags from a local audio file. Returns ``None`` only when
+    the file can't be parsed at all — never raises (graceful degradation)."""
+
+    async def read(self, path: Path) -> AudioTags | None: ...
+
+
+class AudioFingerprinter(Protocol):
+    """Chromaprint (fpcalc) wrapper. ``is_available`` reflects whether the
+    binary is present; ``calculate`` returns ``None`` on any failure."""
+
+    def is_available(self) -> bool: ...
+    async def calculate(self, path: Path) -> Fingerprint | None: ...
+
+
+class AcoustIdClient(Protocol):
+    """AcoustID lookup. ``is_available`` is False without an API key (the whole
+    fingerprint path is then skipped). ``lookup`` returns the best match or
+    ``None`` (no result / service down), never raising."""
+
+    def is_available(self) -> bool: ...
+    async def lookup(self, fingerprint: Fingerprint) -> RecordingMatch | None: ...
@@ -27,6 +27,42 @@ class SqlAlchemyAlbumRepository:
    def __init__(self, session: AsyncSession) -> None:
        self._session = session

+    async def get_or_create(
+        self,
+        *,
+        title: str,
+        artist_id: uuid.UUID,
+        year: int | None,
+        musicbrainz_id: str | None,
+    ) -> Album:
+        """Resolve an album by ``(title, artist_id)``, creating it if absent.
+        Backfills ``year``/``musicbrainz_id`` onto an existing row when it lacks
+        them and enrichment now has values (gap-fill, never overwrite)."""
+        row = (
+            await self._session.execute(
+                select(AlbumModel).where(
+                    AlbumModel.title == title,
+                    AlbumModel.artist_id == artist_id,
+                )
+            )
+        ).scalar_one_or_none()
+        if row is None:
+            row = AlbumModel(
+                title=title,
+                artist_id=artist_id,
+                year=year,
+                musicbrainz_id=musicbrainz_id,
+            )
+            self._session.add(row)
+        else:
+            if row.year is None and year is not None:
+                row.year = year
+            if row.musicbrainz_id is None and musicbrainz_id is not None:
+                row.musicbrainz_id = musicbrainz_id
+        await self._session.flush()
+        await self._session.refresh(row)
+        return _to_entity(row)
+
    async def get_by_id(self, album_id: uuid.UUID) -> Album | None:
        row = await self._session.get(AlbumModel, album_id)
        return _to_entity(row) if row is not None else None
@@ -173,3 +173,47 @@ class SqlAlchemyTrackRepository:
        await self._session.flush()
        await self._session.refresh(row)
        return _to_entity(row)
+
+    async def apply_enrichment(
+        self,
+        track_id: uuid.UUID,
+        *,
+        title: str,
+        artist_id: uuid.UUID,
+        album_id: uuid.UUID | None,
+        genre: str | None,
+        year: int | None,
+        track_number: int | None,
+        duration_seconds: int | None,
+        bitrate: int | None,
+        acoustid_fingerprint: str | None,
+        musicbrainz_id: str | None,
+        metadata_status: str,
+    ) -> Track:
+        row = await self._session.get(TrackModel, track_id)
+        if row is None:
+            raise NotFoundError(f"Track {track_id} not found.")
+        # Identity + status are authoritative for an enrichment run.
+        row.title = title
+        row.artist_id = artist_id
+        row.metadata_status = metadata_status
+        # Nullable extras: fill gaps only — never erase data a prior run found.
+        if album_id is not None:
+            row.album_id = album_id
+        if genre is not None:
+            row.genre = genre
+        if year is not None:
+            row.year = year
+        if track_number is not None:
+            row.track_number = track_number
+        if duration_seconds is not None:
+            row.duration_seconds = duration_seconds
+        if bitrate is not None:
+            row.bitrate = bitrate
+        if acoustid_fingerprint is not None:
+            row.acoustid_fingerprint = acoustid_fingerprint
+        if musicbrainz_id is not None:
+            row.musicbrainz_id = musicbrainz_id
+        await self._session.flush()
+        await self._session.refresh(row)
+        return _to_entity(row)
@@ -0,0 +1 @@
+"""Metadata-enrichment adapters: tag reader, fingerprinter, AcoustID client."""
@@ -0,0 +1,129 @@
+"""AcoustIdHttpClient — identifies a recording from its Chromaprint fingerprint.
+
+One ``/v2/lookup`` call with ``meta=recordings+releasegroups`` returns the
+AcoustID id, the MusicBrainz recording id, and canonical title/artist/album —
+metadata that itself originates from MusicBrainz, so a separate MB call is not
+needed for Phase 1 (plan §6.2 steps 2-3 collapsed into one request).
+
+Graceful degradation: no API key → ``is_available()`` is False and the whole
+fingerprint path is skipped; any network/parse error → ``lookup`` returns
+``None``. A small inter-call delay keeps us within AcoustID's rate limit.
+"""
+
+import asyncio
+import time
+
+import httpx
+
+from app.core.logging import get_logger
+from app.domain.entities.metadata import Fingerprint, RecordingMatch
+
+log = get_logger(__name__)
+
+_DEFAULT_URL = "https://api.acoustid.org/v2/lookup"
+_TIMEOUT_SECONDS = 10.0
+_MIN_INTERVAL_SECONDS = 0.34  # AcoustID allows ~3 req/s; stay polite
+
+
+class AcoustIdHttpClient:
+    """Implements :class:`app.domain.ports.AcoustIdClient`."""
+
+    _throttle_lock = asyncio.Lock()
+    _last_call_monotonic = 0.0
+
+    def __init__(
+        self,
+        *,
+        api_key: str | None,
+        user_agent: str,
+        api_url: str = _DEFAULT_URL,
+    ) -> None:
+        self._api_key = api_key
+        self._user_agent = user_agent
+        self._api_url = api_url
+
+    def is_available(self) -> bool:
+        return bool(self._api_key)
+
+    async def lookup(self, fingerprint: Fingerprint) -> RecordingMatch | None:
+        if not self._api_key:
+            return None
+        try:
+            await self._throttle()
+            async with httpx.AsyncClient(
+                timeout=_TIMEOUT_SECONDS,
+                headers={"User-Agent": self._user_agent},
+            ) as client:
+                resp = await client.get(
+                    self._api_url,
+                    params={
+                        "client": self._api_key,
+                        "duration": str(fingerprint.duration_seconds),
+                        "fingerprint": fingerprint.fingerprint,
+                        "meta": "recordings releasegroups",
+                        "format": "json",
+                    },
+                )
+            resp.raise_for_status()
+            payload = resp.json()
+        except (httpx.HTTPError, ValueError):
+            log.warning("acoustid_lookup_failed")
+            return None
+
+        return _parse_best_match(payload)
+
+    @classmethod
+    async def _throttle(cls) -> None:
+        async with cls._throttle_lock:
+            elapsed = time.monotonic() - cls._last_call_monotonic
+            wait = _MIN_INTERVAL_SECONDS - elapsed
+            if wait > 0:
+                await asyncio.sleep(wait)
+            cls._last_call_monotonic = time.monotonic()
+
+
+def _parse_best_match(payload: object) -> RecordingMatch | None:
+    if not isinstance(payload, dict) or payload.get("status") != "ok":
+        return None
+    results = payload.get("results")
+    if not isinstance(results, list) or not results:
+        return None
+
+    # Results are returned best-score-first; take the top scoring one.
+    best = max(results, key=lambda r: r.get("score", 0.0) if isinstance(r, dict) else 0.0)
+    if not isinstance(best, dict):
+        return None
+
+    acoustid = best.get("id")
+    if not isinstance(acoustid, str):
+        return None
+    score = float(best.get("score", 0.0))
+
+    recording_mbid: str | None = None
+    title: str | None = None
+    artist: str | None = None
+    album: str | None = None
+
+    recordings = best.get("recordings")
+    if isinstance(recordings, list) and recordings and isinstance(recordings[0], dict):
+        rec = recordings[0]
+        recording_mbid = rec.get("id") if isinstance(rec.get("id"), str) else None
+        title = rec.get("title") if isinstance(rec.get("title"), str) else None
+        artists = rec.get("artists")
+        if isinstance(artists, list) and artists and isinstance(artists[0], dict):
+            name = artists[0].get("name")
+            artist = name if isinstance(name, str) else None
+        groups = rec.get("releasegroups")
+        if isinstance(groups, list) and groups and isinstance(groups[0], dict):
+            gtitle = groups[0].get("title")
+            album = gtitle if isinstance(gtitle, str) else None
+
+    return RecordingMatch(
+        acoustid=acoustid,
+        score=score,
+        recording_mbid=recording_mbid,
+        title=title,
+        artist=artist,
+        album=album,
+        year=None,
+    )
@@ -0,0 +1,62 @@
+"""FpcalcFingerprinter — Chromaprint fingerprint via the ``fpcalc`` binary.
+
+``fpcalc -json <file>`` emits ``{"duration": float, "fingerprint": str}``. The
+binary ships in the Docker image (``libchromaprint-tools``). Any failure (binary
+missing, bad file, timeout) degrades to ``None`` — the pipeline then falls back
+to tag-only metadata (plan §6.2: one external dependency must never crash it).
+"""
+
+import asyncio
+import json
+import shutil
+from pathlib import Path
+
+from app.core.logging import get_logger
+from app.domain.entities.metadata import Fingerprint
+
+log = get_logger(__name__)
+
+_TIMEOUT_SECONDS = 30
+
+
+class FpcalcFingerprinter:
+    """Implements :class:`app.domain.ports.AudioFingerprinter`."""
+
+    def __init__(self, binary: str = "fpcalc") -> None:
+        self._binary = binary
+
+    def is_available(self) -> bool:
+        return shutil.which(self._binary) is not None
+
+    async def calculate(self, path: Path) -> Fingerprint | None:
+        if not self.is_available():
+            return None
+        try:
+            proc = await asyncio.create_subprocess_exec(
+                self._binary,
+                "-json",
+                str(path),
+                stdout=asyncio.subprocess.PIPE,
+                stderr=asyncio.subprocess.PIPE,
+            )
+            async with asyncio.timeout(_TIMEOUT_SECONDS):
+                stdout, _stderr = await proc.communicate()
+        except (TimeoutError, OSError):
+            log.warning("fpcalc_failed", path=str(path))
+            return None
+
+        if proc.returncode != 0:
+            log.warning("fpcalc_nonzero", path=str(path), returncode=proc.returncode)
+            return None
+
+        try:
+            data = json.loads(stdout)
+            fingerprint = str(data["fingerprint"])
+            duration = round(float(data["duration"]))
+        except (json.JSONDecodeError, KeyError, ValueError):
+            log.warning("fpcalc_bad_output", path=str(path))
+            return None
+
+        if not fingerprint or duration <= 0:
+            return None
+        return Fingerprint(fingerprint=fingerprint, duration_seconds=duration)
@@ -0,0 +1,88 @@
+"""MutagenTagReader — reads embedded tags from a local audio file.
+
+The offline first pass of enrichment (plan §6.2): well-tagged files get correct
+artist/album/title without any network call. mutagen's ``easy=True`` mode
+normalises tag keys across ID3 / Vorbis / MP4, so one code path covers all the
+formats the library accepts. Parsing is blocking, so it runs in a worker thread.
+"""
+
+import re
+from pathlib import Path
+
+import anyio
+from mutagen import File as MutagenFile  # type: ignore[attr-defined]
+
+from app.core.logging import get_logger
+from app.domain.entities.metadata import AudioTags
+
+log = get_logger(__name__)
+
+_YEAR_RE = re.compile(r"(\d{4})")
+
+
+def _first(value: object) -> str | None:
+    """EasyXxx tags expose values as lists; take the first non-empty string."""
+    if isinstance(value, list):
+        value = value[0] if value else None
+    if value is None:
+        return None
+    text = str(value).strip()
+    return text or None
+
+
+def _parse_year(value: object) -> int | None:
+    text = _first(value)
+    if text is None:
+        return None
+    m = _YEAR_RE.search(text)
+    return int(m.group(1)) if m else None
+
+
+def _parse_track_number(value: object) -> int | None:
+    text = _first(value)
+    if text is None:
+        return None
+    # "3" or "3/12" → 3
+    head = text.split("/", 1)[0].strip()
+    return int(head) if head.isdigit() else None
+
+
+class MutagenTagReader:
+    """Implements :class:`app.domain.ports.AudioTagReader`."""
+
+    async def read(self, path: Path) -> AudioTags | None:
+        try:
+            return await anyio.to_thread.run_sync(self._read_sync, path)
+        except Exception:
+            log.warning("tag_read_failed", path=str(path))
+            return None
+
+    def _read_sync(self, path: Path) -> AudioTags | None:
+        audio = MutagenFile(str(path), easy=True)
+        if audio is None:
+            return None  # unrecognised container
+
+        tags = audio.tags or {}
+        info = getattr(audio, "info", None)
+
+        duration = None
+        bitrate = None
+        if info is not None:
+            length = getattr(info, "length", None)
+            if length:
+                duration = round(float(length))
+            raw_bitrate = getattr(info, "bitrate", None)
+            if raw_bitrate:
+                bitrate = int(raw_bitrate) // 1000  # bits/s → kbps for display
+
+        return AudioTags(
+            title=_first(tags.get("title")),
+            artist=_first(tags.get("artist")),
+            album=_first(tags.get("album")),
+            album_artist=_first(tags.get("albumartist")),
+            genre=_first(tags.get("genre")),
+            year=_parse_year(tags.get("date") or tags.get("year")),
+            track_number=_parse_track_number(tags.get("tracknumber")),
+            duration_seconds=duration,
+            bitrate=bitrate,
+        )
@@ -1,7 +1,7 @@
 """arq worker settings — the queue runtime. Task functions register here.

 Run with: ``arq app.workers.arq_worker.WorkerSettings``.
-Tasks (download, enrich, transcode) are appended to ``functions`` in later steps.
+Tasks (download, transcode) are appended to ``functions`` in later steps.
 """

 from typing import Any, ClassVar
@@ -10,6 +10,7 @@ from arq.connections import RedisSettings

 from app.core.config import get_settings
 from app.core.logging import configure_logging, get_logger
+from app.workers.tasks.enrich_task import enrich_track
 from app.workers.tasks.import_task import scan_local_folder

 log = get_logger("worker")
@@ -26,7 +27,7 @@ async def shutdown(_ctx: dict[str, Any]) -> None:


 class WorkerSettings:
-    functions: ClassVar[list[Any]] = [scan_local_folder]
+    functions: ClassVar[list[Any]] = [scan_local_folder, enrich_track]
    on_startup = startup
    on_shutdown = shutdown
    max_jobs = get_settings().max_parallel_downloads
@@ -4,14 +4,18 @@ A short-lived pool per call keeps things simple (enqueues are rare, admin-driven
 actions). Redis being down degrades to a clean 503 rather than a crash
 (graceful degradation)."""

+import uuid
 from typing import Any

 from arq import create_pool
 from arq.connections import RedisSettings

 from app.core.config import get_settings
+from app.core.logging import get_logger
 from app.domain.errors import DependencyUnavailableError

+log = get_logger("worker.queue")
+

 async def enqueue(function: str, **kwargs: Any) -> str:
    """Enqueue ``function`` by name, returning the job id. Raises
@@ -28,3 +32,18 @@ async def enqueue(function: str, **kwargs: Any) -> str:
    if job is None:
        raise DependencyUnavailableError("Could not enqueue job.")
    return str(job.job_id)
+
+
+async def enqueue_enrich(track_id: uuid.UUID) -> None:
+    """Best-effort enqueue of metadata enrichment for a freshly stored track.
+
+    The track is already persisted, so enrichment is a follow-up, not a barrier:
+    if the queue is unreachable we log and move on (graceful degradation). The
+    track stays ``metadata_status=pending`` and can be re-enriched later.
+
+    Deferred a few seconds so the caller's DB transaction is committed before the
+    worker looks the track up (the upload request commits only after it returns)."""
+    try:
+        await enqueue("enrich_track", track_id=str(track_id), _defer_by=5)
+    except DependencyUnavailableError:
+        log.warning("enrich_enqueue_failed", track_id=str(track_id))
@@ -0,0 +1,56 @@
+"""arq task: enrich one track's metadata (plan §6.2, §1D).
+
+Wires the §6.2 pipeline adapters to :class:`MetadataEnrichmentService` and runs
+it in the worker's own transactional session. Enqueued (deferred) after upload
+and after a local-folder import. Idempotent and best-effort — a missing track or
+a ``manual`` one is a clean no-op.
+"""
+
+import uuid
+from typing import Any
+
+from app.application.metadata_service import MetadataEnrichmentService
+from app.core.config import get_settings
+from app.core.logging import get_logger
+from app.infrastructure.db import session_scope
+from app.infrastructure.db.repositories import (
+    SqlAlchemyAlbumRepository,
+    SqlAlchemyArtistRepository,
+    SqlAlchemyTrackRepository,
+)
+from app.infrastructure.metadata.acoustid import AcoustIdHttpClient
+from app.infrastructure.metadata.fingerprint import FpcalcFingerprinter
+from app.infrastructure.metadata.tags import MutagenTagReader
+from app.infrastructure.storage.provider import get_file_storage
+
+log = get_logger("worker.enrich")
+
+
+async def enrich_track(_ctx: dict[str, Any], *, track_id: str) -> dict[str, Any]:
+    settings = get_settings()
+    api_key = (
+        settings.acoustid_api_key.get_secret_value() if settings.acoustid_api_key else None
+    )
+    acoustid = AcoustIdHttpClient(
+        api_key=api_key,
+        user_agent=settings.musicbrainz_user_agent,
+        api_url=settings.acoustid_api_url,
+    )
+
+    async with session_scope() as session:
+        service = MetadataEnrichmentService(
+            tracks=SqlAlchemyTrackRepository(session),
+            artists=SqlAlchemyArtistRepository(session),
+            albums=SqlAlchemyAlbumRepository(session),
+            storage=get_file_storage(),
+            tag_reader=MutagenTagReader(),
+            fingerprinter=FpcalcFingerprinter(settings.fpcalc_path),
+            acoustid=acoustid,
+        )
+        result = await service.enrich(uuid.UUID(track_id))
+
+    return {
+        "track_id": str(result.track_id),
+        "status": result.status,
+        "mbid": result.matched_mbid,
+    }
@@ -18,6 +18,7 @@ from app.infrastructure.db.repositories import (
 )
 from app.infrastructure.sources.registry import build_source_registry
 from app.infrastructure.storage.provider import get_file_storage
+from app.workers.queue import enqueue_enrich

 log = get_logger("worker.import")

@@ -37,6 +38,11 @@ async def scan_local_folder(
        )
        summary = await service.scan_and_import(backend, added_by=actor)

+    # Enqueue enrichment only after the import transaction has committed above,
+    # so the enrich worker is guaranteed to see the new rows.
+    for track_id in summary.imported_ids:
+        await enqueue_enrich(track_id)
+
    return {
        "source": summary.source,
        "seen": summary.seen,
@@ -25,6 +25,8 @@ dependencies = [
    "cryptography>=44.0",
    # outbound http (ML client, MusicBrainz, AcoustID)
    "httpx>=0.28",
+    # embedded audio tag reading (enrichment tag pre-pass)
+    "mutagen>=1.47",
    # S3-compatible object storage
    "aioboto3>=13.0",
    # logging
@@ -0,0 +1,75 @@
+"""Unit tests for the AcoustID response parser — pure, no network."""
+
+from app.infrastructure.metadata.acoustid import _parse_best_match
+
+
+def _payload_with_results(results: list[object]) -> dict[str, object]:
+    return {"status": "ok", "results": results}
+
+
+def test_parses_full_recording() -> None:
+    payload = _payload_with_results(
+        [
+            {
+                "id": "acoustid-1",
+                "score": 0.97,
+                "recordings": [
+                    {
+                        "id": "mb-rec-1",
+                        "title": "One More Time",
+                        "artists": [{"id": "a1", "name": "Daft Punk"}],
+                        "releasegroups": [{"id": "rg1", "title": "Discovery"}],
+                    }
+                ],
+            }
+        ]
+    )
+
+    match = _parse_best_match(payload)
+
+    assert match is not None
+    assert match.acoustid == "acoustid-1"
+    assert match.recording_mbid == "mb-rec-1"
+    assert match.title == "One More Time"
+    assert match.artist == "Daft Punk"
+    assert match.album == "Discovery"
+    assert match.score == 0.97
+
+
+def test_picks_highest_score() -> None:
+    payload = _payload_with_results(
+        [
+            {"id": "low", "score": 0.40, "recordings": [{"id": "r-low", "title": "Low"}]},
+            {"id": "high", "score": 0.92, "recordings": [{"id": "r-high", "title": "High"}]},
+        ]
+    )
+
+    match = _parse_best_match(payload)
+
+    assert match is not None
+    assert match.acoustid == "high"
+    assert match.title == "High"
+
+
+def test_result_without_recordings_still_returns_id() -> None:
+    payload = _payload_with_results([{"id": "acoustid-only", "score": 0.5}])
+
+    match = _parse_best_match(payload)
+
+    assert match is not None
+    assert match.acoustid == "acoustid-only"
+    assert match.recording_mbid is None
+    assert match.title is None
+
+
+def test_error_status_returns_none() -> None:
+    assert _parse_best_match({"status": "error", "error": {"message": "bad"}}) is None
+
+
+def test_empty_results_returns_none() -> None:
+    assert _parse_best_match(_payload_with_results([])) is None
+
+
+def test_non_dict_payload_returns_none() -> None:
+    assert _parse_best_match("nonsense") is None
+    assert _parse_best_match(None) is None
@@ -0,0 +1,283 @@
+"""Unit tests for MetadataEnrichmentService — DB-free, in-memory fakes.
+
+Covers the §6.2 orchestration contract: tag-first merge, AcoustID fallback,
+artist/album resolution, status transitions, and the hard invariants
+(``manual`` untouched, graceful degradation, idempotent gap-fill).
+"""
+
+import datetime as dt
+import uuid
+from collections.abc import AsyncIterator
+from contextlib import asynccontextmanager
+from pathlib import Path
+
+import pytest
+from app.application.metadata_service import MetadataEnrichmentService
+from app.domain.entities import Artist, Track
+from app.domain.entities.album import Album
+from app.domain.entities.metadata import AudioTags, Fingerprint, RecordingMatch
+
+pytestmark = pytest.mark.asyncio
+
+_UNKNOWN = "Unknown Artist"
+
+
+def _track(*, metadata_status: str = "pending", title: str = "raw-stem") -> Track:
+    now = dt.datetime.now(dt.UTC)
+    return Track(
+        id=uuid.uuid4(),
+        title=title,
+        artist_id=uuid.uuid4(),  # the "Unknown Artist" id
+        album_id=None,
+        storage_uri="tracks/aa/song.mp3",
+        file_format="mp3",
+        file_size=123,
+        source="upload",
+        source_id="deadbeef",
+        duration_seconds=None,
+        genre=None,
+        year=None,
+        metadata_status=metadata_status,
+        created_at=now,
+        updated_at=now,
+    )
+
+
+class FakeTrackRepo:
+    def __init__(self, track: Track | None) -> None:
+        self._track = track
+        self.applied: dict[str, object] | None = None
+
+    async def get_by_id(self, track_id: uuid.UUID) -> Track | None:
+        return self._track
+
+    async def apply_enrichment(self, track_id: uuid.UUID, **kw: object) -> Track:
+        self.applied = kw
+        return self._track  # type: ignore[return-value]
+
+
+class FakeArtistRepo:
+    def __init__(self) -> None:
+        self.created: list[str] = []
+
+    async def get_or_create(self, name: str) -> Artist:
+        self.created.append(name)
+        now = dt.datetime.now(dt.UTC)
+        return Artist(id=uuid.uuid4(), name=name, created_at=now, updated_at=now)
+
+
+class FakeAlbumRepo:
+    def __init__(self) -> None:
+        self.created: list[tuple[str, uuid.UUID]] = []
+
+    async def get_or_create(
+        self, *, title: str, artist_id: uuid.UUID, year: int | None, musicbrainz_id: str | None
+    ) -> Album:
+        self.created.append((title, artist_id))
+        now = dt.datetime.now(dt.UTC)
+        return Album(
+            id=uuid.uuid4(),
+            title=title,
+            artist_id=artist_id,
+            year=year,
+            cover_path=None,
+            musicbrainz_id=musicbrainz_id,
+            created_at=now,
+            updated_at=now,
+        )
+
+
+class FakeStorage:
+    @asynccontextmanager
+    async def as_local_path(self, key: str) -> AsyncIterator[Path]:
+        yield Path("/tmp") / key
+
+
+class FakeTagReader:
+    def __init__(self, tags: AudioTags | None) -> None:
+        self._tags = tags
+
+    async def read(self, path: Path) -> AudioTags | None:
+        return self._tags
+
+
+class FakeFingerprinter:
+    def __init__(self, fp: Fingerprint | None, *, available: bool = True) -> None:
+        self._fp = fp
+        self._available = available
+
+    def is_available(self) -> bool:
+        return self._available
+
+    async def calculate(self, path: Path) -> Fingerprint | None:
+        return self._fp
+
+
+class FakeAcoustId:
+    def __init__(self, match: RecordingMatch | None, *, available: bool = True) -> None:
+        self._match = match
+        self._available = available
+        self.calls = 0
+
+    def is_available(self) -> bool:
+        return self._available
+
+    async def lookup(self, fingerprint: Fingerprint) -> RecordingMatch | None:
+        self.calls += 1
+        return self._match
+
+
+def _service(
+    *,
+    track: Track | None,
+    tags: AudioTags | None = None,
+    fp: Fingerprint | None = None,
+    match: RecordingMatch | None = None,
+    fp_available: bool = True,
+    acoustid_available: bool = True,
+) -> tuple[MetadataEnrichmentService, FakeTrackRepo, FakeArtistRepo, FakeAlbumRepo, FakeAcoustId]:
+    tracks = FakeTrackRepo(track)
+    artists = FakeArtistRepo()
+    albums = FakeAlbumRepo()
+    acoustid = FakeAcoustId(match, available=acoustid_available)
+    service = MetadataEnrichmentService(
+        tracks=tracks,  # type: ignore[arg-type]
+        artists=artists,  # type: ignore[arg-type]
+        albums=albums,  # type: ignore[arg-type]
+        storage=FakeStorage(),  # type: ignore[arg-type]
+        tag_reader=FakeTagReader(tags),  # type: ignore[arg-type]
+        fingerprinter=FakeFingerprinter(fp, available=fp_available),  # type: ignore[arg-type]
+        acoustid=acoustid,  # type: ignore[arg-type]
+    )
+    return service, tracks, artists, albums, acoustid
+
+
+async def test_tags_only_enriches_and_relinks_artist_and_album() -> None:
+    track = _track()
+    tags = AudioTags(
+        title="Real Title",
+        artist="Pink Floyd",
+        album="The Wall",
+        genre="Rock",
+        year=1979,
+        track_number=1,
+        duration_seconds=222,
+    )
+    service, tracks, artists, albums, acoustid = _service(track=track, tags=tags)
+
+    result = await service.enrich(track.id)
+
+    assert result.status == "enriched"
+    assert acoustid.calls == 0  # no fingerprint → no lookup needed
+    assert "Pink Floyd" in artists.created
+    assert albums.created and albums.created[0][0] == "The Wall"
+    applied = tracks.applied
+    assert applied is not None
+    assert applied["title"] == "Real Title"
+    assert applied["genre"] == "Rock"
+    assert applied["year"] == 1979
+    assert applied["track_number"] == 1
+    assert applied["duration_seconds"] == 222
+    assert applied["metadata_status"] == "enriched"
+
+
+async def test_manual_track_is_never_touched() -> None:
+    track = _track(metadata_status="manual")
+    service, tracks, _, _, _ = _service(track=track, tags=AudioTags(artist="X"))
+
+    result = await service.enrich(track.id)
+
+    assert result.status == "skipped"
+    assert tracks.applied is None  # nothing written
+
+
+async def test_missing_track_is_a_clean_noop() -> None:
+    service, tracks, _, _, _ = _service(track=None)
+
+    result = await service.enrich(uuid.uuid4())
+
+    assert result.status == "skipped"
+    assert tracks.applied is None
+
+
+async def test_nothing_found_marks_failed() -> None:
+    track = _track()
+    # No tags, no fingerprint → no identity at all.
+    service, tracks, artists, albums, _acoustid = _service(track=track, tags=None, fp=None)
+
+    result = await service.enrich(track.id)
+
+    assert result.status == "failed"
+    assert artists.created == []  # artist stays the original unknown
+    assert albums.created == []
+    applied = tracks.applied
+    assert applied is not None
+    assert applied["artist_id"] == track.artist_id  # fallback kept
+    assert applied["metadata_status"] == "failed"
+
+
+async def test_acoustid_path_fills_when_tags_absent() -> None:
+    track = _track()
+    fp = Fingerprint(fingerprint="AQAAxyz", duration_seconds=200)
+    match = RecordingMatch(
+        acoustid="acoustid-uuid",
+        score=0.95,
+        recording_mbid="mb-recording-id",
+        title="Identified Title",
+        artist="Daft Punk",
+        album="Discovery",
+    )
+    service, tracks, artists, _albums, acoustid = _service(
+        track=track, tags=None, fp=fp, match=match
+    )
+
+    result = await service.enrich(track.id)
+
+    assert result.status == "enriched"
+    assert result.matched_mbid == "mb-recording-id"
+    assert acoustid.calls == 1
+    applied = tracks.applied
+    assert applied is not None
+    assert applied["title"] == "Identified Title"
+    assert applied["musicbrainz_id"] == "mb-recording-id"
+    assert applied["acoustid_fingerprint"] == "acoustid-uuid"
+    assert "Daft Punk" in artists.created
+
+
+async def test_tags_win_over_acoustid_for_overlapping_fields() -> None:
+    track = _track()
+    fp = Fingerprint(fingerprint="AQAA", duration_seconds=200)
+    tags = AudioTags(title="Tagged Title", artist="Tagged Artist")
+    match = RecordingMatch(
+        acoustid="aid",
+        score=0.9,
+        recording_mbid="mbid",
+        title="AcoustID Title",
+        artist="AcoustID Artist",
+    )
+    service, tracks, artists, _albums, _acoustid = _service(
+        track=track, tags=tags, fp=fp, match=match
+    )
+
+    await service.enrich(track.id)
+
+    applied = tracks.applied
+    assert applied is not None
+    assert applied["title"] == "Tagged Title"  # tag preferred
+    assert "Tagged Artist" in artists.created
+    # but the MBID from AcoustID is still captured
+    assert applied["musicbrainz_id"] == "mbid"
+
+
+async def test_fingerprint_skipped_when_acoustid_unavailable() -> None:
+    track = _track()
+    fp = Fingerprint(fingerprint="AQAA", duration_seconds=200)
+    service, _tracks, _artists, _albums, acoustid = _service(
+        track=track, tags=AudioTags(artist="Tagged"), fp=fp, acoustid_available=False
+    )
+
+    result = await service.enrich(track.id)
+
+    # tags still enrich, but no AcoustID call is attempted
+    assert acoustid.calls == 0
+    assert result.status == "enriched"
@@ -0,0 +1,27 @@
+"""Unit tests for the mutagen tag-parsing helpers — pure, no files."""
+
+from app.infrastructure.metadata.tags import _first, _parse_track_number, _parse_year
+
+
+def test_first_takes_head_of_list() -> None:
+    assert _first(["Pink Floyd", "other"]) == "Pink Floyd"
+    assert _first("Solo") == "Solo"
+    assert _first([]) is None
+    assert _first(None) is None
+    assert _first(["  "]) is None  # whitespace-only → None
+
+
+def test_parse_year_extracts_four_digits() -> None:
+    assert _parse_year(["1979"]) == 1979
+    assert _parse_year(["1979-01-02"]) == 1979
+    assert _parse_year("2021-12") == 2021
+    assert _parse_year(["no year"]) is None
+    assert _parse_year(None) is None
+
+
+def test_parse_track_number_handles_slash_form() -> None:
+    assert _parse_track_number(["3/12"]) == 3
+    assert _parse_track_number(["7"]) == 7
+    assert _parse_track_number("1/10") == 1
+    assert _parse_track_number(["A1"]) is None
+    assert _parse_track_number(None) is None
				`@@ -0,0 +1 @@`
				`"""Metadata-enrichment adapters: tag reader, fingerprinter, AcoustID client."""`