feat(sources): YouTube Music search + download pipeline (§1C/§1E)

Pluggable fetch source: ytmusicapi search + yt-dlp download (cookies-file guard), DownloadJob entity/repo + DownloadService, download_task worker with exponential-backoff retries, and wired /search, /sources/{source}/search, and /downloads endpoints. Adds youtube_enabled/cookies config, yt-dlp+ytmusicapi deps, and the download_jobs.track_id migration. Snapshot also bundles in-progress storage/tracks/acoustid edits. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-14 14:04:33 +03:00
parent ea880edd57
commit 78007461e1
32 changed files with 2645 additions and 819 deletions
@@ -15,6 +15,7 @@ from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
 from sqlalchemy.ext.asyncio import AsyncSession

 from app.application.auth_service import AuthService
+from app.application.download_service import DownloadService
 from app.application.metadata_service import MetadataEnrichmentService
 from app.application.streaming_service import StreamingService
 from app.application.subsonic_auth_service import SubsonicAuthService
@@ -29,6 +30,7 @@ from app.infrastructure.db import get_sessionmaker
 from app.infrastructure.db.repositories import (
    SqlAlchemyAlbumRepository,
    SqlAlchemyArtistRepository,
+    SqlAlchemyDownloadJobRepository,
    SqlAlchemyHistoryRepository,
    SqlAlchemyLikeRepository,
    SqlAlchemyPlaylistRepository,
@@ -41,7 +43,7 @@ from app.infrastructure.metadata.fingerprint import FpcalcFingerprinter
 from app.infrastructure.metadata.tags import MutagenTagReader
 from app.infrastructure.sources.registry import SourceRegistry, build_source_registry
 from app.infrastructure.storage.provider import get_file_storage
-from app.workers.queue import enqueue_enrich
+from app.workers.queue import enqueue_download, enqueue_enrich


 async def get_session() -> AsyncIterator[AsyncSession]:
@@ -136,9 +138,7 @@ def get_streaming_service(session: SessionDep, storage: FileStorageDep) -> Strea
    )


-def get_metadata_service(
-    session: SessionDep, storage: FileStorageDep
-) -> MetadataEnrichmentService:
+def get_metadata_service(session: SessionDep, storage: FileStorageDep) -> MetadataEnrichmentService:
    """Wires the §6.2 fingerprint/AcoustID adapters for read-only, inline use
    (the metadata editor's "find matches" — §A7). The full pipeline (incl.
    cover art) stays in the worker (`tasks/enrich_task.py`)."""
@@ -161,9 +161,21 @@ def get_metadata_service(
    )


+def get_download_service(session: SessionDep, storage: FileStorageDep) -> DownloadService:
+    return DownloadService(
+        jobs=SqlAlchemyDownloadJobRepository(session),
+        tracks=SqlAlchemyTrackRepository(session),
+        artists=SqlAlchemyArtistRepository(session),
+        storage=storage,
+        enqueue_download=enqueue_download,
+        enqueue_enrich=enqueue_enrich,
+    )
+
+
 UploadServiceDep = Annotated[UploadService, Depends(get_upload_service)]
 StreamingServiceDep = Annotated[StreamingService, Depends(get_streaming_service)]
 MetadataServiceDep = Annotated[MetadataEnrichmentService, Depends(get_metadata_service)]
+DownloadServiceDep = Annotated[DownloadService, Depends(get_download_service)]


 # -- library repository deps ---------------------------------------------------
@@ -0,0 +1,59 @@
+"""Schemas for the download job endpoints (§A5 download manager)."""
+
+import datetime as dt
+import uuid
+
+from pydantic import BaseModel, Field
+
+from app.domain.entities.download import DownloadJob
+
+
+class DownloadCreate(BaseModel):
+    """Request to download an item discovered on a fetch source."""
+
+    source: str
+    source_id: str = Field(min_length=1)
+    # Optional free-text the result came from — stored for display only.
+    query: str | None = None
+
+
+class DownloadJobOut(BaseModel):
+    id: uuid.UUID
+    source: str
+    source_id: str | None
+    query: str | None
+    status: str
+    progress: float
+    error_message: str | None
+    retry_count: int
+    track_id: uuid.UUID | None
+    created_at: dt.datetime
+    updated_at: dt.datetime
+
+    @classmethod
+    def from_entity(cls, job: DownloadJob) -> DownloadJobOut:
+        return cls(
+            id=job.id,
+            source=job.source,
+            source_id=job.source_id,
+            query=job.query,
+            status=job.status,
+            progress=job.progress,
+            error_message=job.error_message,
+            retry_count=job.retry_count,
+            track_id=job.track_id,
+            created_at=job.created_at,
+            updated_at=job.updated_at,
+        )
+
+
+class DownloadCreateResponse(BaseModel):
+    """Result of requesting a download.
+
+    ``already_in_library`` → the item was already imported (``track_id`` set, no
+    job). Otherwise ``job`` describes the queued (or already in-flight) download.
+    """
+
+    already_in_library: bool
+    track_id: uuid.UUID | None
+    job: DownloadJobOut | None
@@ -0,0 +1,35 @@
+"""Schemas for searching external (fetch) sources — the §A4 discover screen."""
+
+from pydantic import BaseModel
+
+from app.domain.sources import SearchResult
+
+
+class ExternalSearchResultOut(BaseModel):
+    source: str
+    source_id: str
+    title: str
+    artist: str | None
+    album: str | None
+    duration_seconds: int | None
+    thumbnail_url: str | None
+
+    @classmethod
+    def from_entity(cls, r: SearchResult) -> ExternalSearchResultOut:
+        return cls(
+            source=r.source,
+            source_id=r.source_id,
+            title=r.title,
+            artist=r.artist,
+            album=r.album,
+            duration_seconds=r.duration_seconds,
+            thumbnail_url=r.thumbnail_url,
+        )
+
+
+class ExternalSearchResponse(BaseModel):
+    """Flat list of hits across one or more searchable sources, plus the names of
+    sources that were unavailable (so the UI can show a soft warning)."""
+
+    results: list[ExternalSearchResultOut]
+    searched_sources: list[str]
@@ -1,36 +1,78 @@
-"""Download job endpoints. Heavy work is dispatched to arq workers."""
+"""Download job endpoints (§A5). Heavy work is dispatched to arq workers — these
+handlers only create/inspect/cancel/retry job records."""

 import uuid
-from typing import Any

-from fastapi import APIRouter
+from fastapi import APIRouter, Query, Response
+
+from app.api.deps import CurrentUser, DownloadServiceDep
+from app.api.schemas.download import DownloadCreate, DownloadCreateResponse, DownloadJobOut
+from app.api.schemas.pagination import PagedResponse

 router = APIRouter(prefix="/downloads", tags=["downloads"])


@router.get("")
-async def list_downloads() -> Any: ...
+async def list_downloads(
+    service: DownloadServiceDep,
+    user: CurrentUser,
+    status: str | None = Query(default=None),
+    mine: bool = Query(default=False),
+    limit: int = Query(50, ge=1, le=200),
+    offset: int = Query(0, ge=0),
+) -> PagedResponse[DownloadJobOut]:
+    jobs, total = await service.list(
+        requested_by=user.id if mine else None,
+        status=status,
+        limit=limit,
+        offset=offset,
+    )
+    return PagedResponse(
+        items=[DownloadJobOut.from_entity(j) for j in jobs],
+        total=total,
+        limit=limit,
+        offset=offset,
+    )


-@router.post("")
-async def create_download() -> Any: ...
+@router.post("", status_code=202)
+async def create_download(
+    body: DownloadCreate,
+    service: DownloadServiceDep,
+    user: CurrentUser,
+) -> DownloadCreateResponse:
+    result = await service.request(
+        source=body.source,
+        source_id=body.source_id,
+        query=body.query,
+        requested_by=user.id,
+    )
+    return DownloadCreateResponse(
+        already_in_library=result.already_in_library,
+        track_id=result.track_id,
+        job=DownloadJobOut.from_entity(result.job) if result.job is not None else None,
+    )


@router.get("/{job_id}")
-async def get_download(job_id: uuid.UUID) -> Any: ...
+async def get_download(
+    job_id: uuid.UUID, service: DownloadServiceDep, _: CurrentUser
+) -> DownloadJobOut:
+    job = await service.get(job_id)
+    return DownloadJobOut.from_entity(job)


-@router.delete("/{job_id}")
-async def cancel_download(job_id: uuid.UUID) -> Any: ...
+@router.delete("/{job_id}", status_code=204)
+async def cancel_download(
+    job_id: uuid.UUID, service: DownloadServiceDep, _: CurrentUser
+) -> Response:
+    await service.cancel(job_id)
+    return Response(status_code=204)


@router.post("/{job_id}/retry")
-async def retry_download(job_id: uuid.UUID) -> Any: ...
-
-
-@router.post("/pause")
-async def pause_downloads() -> Any: ...
-
-
-@router.post("/resume")
-async def resume_downloads() -> Any: ...
+async def retry_download(
+    job_id: uuid.UUID, service: DownloadServiceDep, _: CurrentUser
+) -> DownloadJobOut:
+    job = await service.retry(job_id)
+    return DownloadJobOut.from_entity(job)
@@ -1,12 +1,11 @@
 """Search endpoints: global and library-scoped."""

-from typing import Any
-
 from fastapi import APIRouter, Query

-from app.api.deps import AlbumRepoDep, ArtistRepoDep, CurrentUser, TrackRepoDep
+from app.api.deps import AlbumRepoDep, ArtistRepoDep, CurrentUser, SourceRegistryDep, TrackRepoDep
 from app.api.schemas.album import AlbumOut
 from app.api.schemas.artist import ArtistOut
+from app.api.schemas.external_search import ExternalSearchResponse, ExternalSearchResultOut
 from app.api.schemas.search import LibrarySearchResponse
 from app.api.schemas.track import TrackOut
 from app.api.v1.albums import _build_album_out
@@ -16,7 +15,26 @@ router = APIRouter(prefix="/search", tags=["search"])


@router.get("")
-async def search(_: CurrentUser) -> Any: ...
+async def search(
+    _: CurrentUser,
+    registry: SourceRegistryDep,
+    q: str = Query(min_length=1),
+    limit: int = Query(20, ge=1, le=50),
+) -> ExternalSearchResponse:
+    """Search every available fetch source and merge the hits (§A4 discover).
+
+    A source that is down contributes nothing rather than failing the whole
+    request (graceful degradation); only available sources are reported as
+    searched."""
+    results: list[ExternalSearchResultOut] = []
+    searched: list[str] = []
+    for backend in registry.searchables():
+        if not backend.is_available():
+            continue
+        searched.append(backend.name)
+        hits = await backend.search(q, limit=limit)
+        results.extend(ExternalSearchResultOut.from_entity(h) for h in hits)
+    return ExternalSearchResponse(results=results, searched_sources=searched)


@router.get("/library")
@@ -1,14 +1,13 @@
-"""External source endpoints: enumerate sources and trigger imports.
+"""External source endpoints: enumerate sources, search, and trigger imports.

-Listing/health are read-only (any authenticated user). Scanning a source is an
-admin action and runs in a worker — the endpoint only enqueues it.
+Listing/health/search are read-only (any authenticated user). Scanning a source
+is an admin action and runs in a worker — the endpoint only enqueues it.
 """

-from typing import Any
-
-from fastapi import APIRouter
+from fastapi import APIRouter, Query

 from app.api.deps import CurrentUser, SourceRegistryDep, SuperUser
+from app.api.schemas.external_search import ExternalSearchResponse, ExternalSearchResultOut
 from app.api.schemas.source import ScanResponse, SourceHealthOut, SourceInfoOut
 from app.domain.errors import DependencyUnavailableError
 from app.workers.queue import enqueue
@@ -39,6 +38,18 @@ async def source_health(


@router.get("/{source}/search")
-async def search_source(source: str, _: CurrentUser) -> Any:
-    # Search is for fetch-style sources (youtube, …) — not yet implemented.
-    ...
+async def search_source(
+    source: str,
+    _: CurrentUser,
+    registry: SourceRegistryDep,
+    q: str = Query(min_length=1),
+    limit: int = Query(20, ge=1, le=50),
+) -> ExternalSearchResponse:
+    backend = registry.searchable(source)  # 404 if unknown, 422 if not searchable
+    if not backend.is_available():
+        raise DependencyUnavailableError(f"Source {source!r} is not available.")
+    results = await backend.search(q, limit=limit)
+    return ExternalSearchResponse(
+        results=[ExternalSearchResultOut.from_entity(r) for r in results],
+        searched_sources=[source],
+    )
@@ -63,8 +63,7 @@ async def get_storage_stats(
        by_metadata_status=stats.by_metadata_status,
        by_source=stats.by_source,
        top_genres=[
-            GenreCountOut(genre=genre, track_count=count)
-            for genre, count in genres[:_TOP_GENRES]
+            GenreCountOut(genre=genre, track_count=count) for genre, count in genres[:_TOP_GENRES]
        ],
        disk=DiskUsageOut(total=disk.total, used=disk.used, free=disk.free) if disk else None,
    )
@@ -87,9 +87,7 @@ async def list_tracks(
        limit=limit,
        offset=offset,
    )
-    total = await track_repo.count(
-        artist_id=artist_id, album_id=album_id, q=q, source=source
-    )
+    total = await track_repo.count(artist_id=artist_id, album_id=album_id, q=q, source=source)

    artist_ids = list({t.artist_id for t in tracks})
    album_ids = list({t.album_id for t in tracks if t.album_id is not None})
@@ -0,0 +1,183 @@
+"""DownloadService — request external downloads and import their results.
+
+Two roles (plan §6.1):
+
+* **Request side** (HTTP): validate + dedup a download request, create a
+  ``queued`` job, and enqueue the worker. Dedup is on ``(source, source_id)``
+  against both the library (already imported) and in-flight jobs (a double-click
+  must not queue twice) — idempotency per CLAUDE.md.
+* **Worker side**: ``store_result`` turns a backend's :class:`DownloadResult`
+  into a managed file + minimal ``pending`` track (sibling of
+  :class:`~app.application.import_service.LibraryImportService`); enrichment
+  (§6.2) fills the rest.
+
+The fingerprint-level dedup (a different id that turns out to be the same audio)
+happens later in enrichment, where the fingerprint is computed.
+"""
+
+import contextlib
+import uuid
+from collections.abc import Awaitable, Callable
+from dataclasses import dataclass
+
+import anyio
+
+from app.core.logging import get_logger
+from app.domain.entities.download import DownloadJob
+from app.domain.errors import NotFoundError, ValidationError
+from app.domain.ports import (
+    ArtistRepository,
+    DownloadJobRepository,
+    FileStorage,
+    TrackRepository,
+)
+from app.domain.sources import DownloadResult
+
+log = get_logger(__name__)
+
+_UNKNOWN_ARTIST = "Unknown Artist"
+
+# (job_id) -> None — enqueue the download worker, deferred so the job row is
+# committed before the worker reads it (same pattern as enrich).
+DownloadEnqueuer = Callable[[uuid.UUID], Awaitable[None]]
+EnrichEnqueuer = Callable[[uuid.UUID], Awaitable[None]]
+
+
+@dataclass(frozen=True)
+class DownloadRequest:
+    """Outcome of asking for a download.
+
+    Exactly one of the three states holds: the item is already in the library
+    (``track_id`` set, ``already_in_library``), a job already covers it / was
+    just created (``job`` set), so the UI can route to the download manager.
+    """
+
+    job: DownloadJob | None
+    track_id: uuid.UUID | None
+    already_in_library: bool
+
+
+class DownloadService:
+    def __init__(
+        self,
+        *,
+        jobs: DownloadJobRepository,
+        tracks: TrackRepository,
+        artists: ArtistRepository,
+        storage: FileStorage,
+        enqueue_download: DownloadEnqueuer | None = None,
+        enqueue_enrich: EnrichEnqueuer | None = None,
+    ) -> None:
+        self._jobs = jobs
+        self._tracks = tracks
+        self._artists = artists
+        self._storage = storage
+        self._enqueue_download = enqueue_download
+        self._enqueue_enrich = enqueue_enrich
+
+    # -- request side ---------------------------------------------------------
+    async def request(
+        self,
+        *,
+        source: str,
+        source_id: str,
+        query: str | None,
+        requested_by: uuid.UUID | None,
+    ) -> DownloadRequest:
+        source_id = source_id.strip()
+        if not source_id:
+            raise ValidationError("A source_id is required to download.")
+
+        existing = await self._tracks.get_by_source(source, source_id)
+        if existing is not None:
+            return DownloadRequest(job=None, track_id=existing.id, already_in_library=True)
+
+        active = await self._jobs.get_active_for_source(source, source_id)
+        if active is not None:
+            return DownloadRequest(job=active, track_id=None, already_in_library=False)
+
+        job = await self._jobs.add(
+            source=source,
+            source_id=source_id,
+            query=query,
+            requested_by=requested_by,
+        )
+        if self._enqueue_download is not None:
+            await self._enqueue_download(job.id)
+        return DownloadRequest(job=job, track_id=None, already_in_library=False)
+
+    async def list(
+        self,
+        *,
+        requested_by: uuid.UUID | None,
+        status: str | None,
+        limit: int,
+        offset: int,
+    ) -> tuple[list[DownloadJob], int]:
+        jobs = await self._jobs.list(
+            requested_by=requested_by, status=status, limit=limit, offset=offset
+        )
+        total = await self._jobs.count(requested_by=requested_by, status=status)
+        return jobs, total
+
+    async def get(self, job_id: uuid.UUID) -> DownloadJob:
+        job = await self._jobs.get_by_id(job_id)
+        if job is None:
+            raise NotFoundError(f"Download job {job_id} not found.")
+        return job
+
+    async def cancel(self, job_id: uuid.UUID) -> None:
+        """Remove the job record. True mid-flight cancellation of an in-progress
+        yt-dlp download is out of scope (MVP); the worker tolerates a vanished
+        job row (its status writes become no-ops)."""
+        job = await self._jobs.get_by_id(job_id)
+        if job is None:
+            raise NotFoundError(f"Download job {job_id} not found.")
+        await self._jobs.delete(job_id)
+
+    async def retry(self, job_id: uuid.UUID) -> DownloadJob:
+        job = await self.get(job_id)
+        await self._jobs.set_status(job_id, status="queued", error_message=None)
+        if self._enqueue_download is not None:
+            await self._enqueue_download(job_id)
+        refreshed = await self._jobs.get_by_id(job_id)
+        return refreshed if refreshed is not None else job
+
+    # -- worker side ----------------------------------------------------------
+    async def store_result(
+        self,
+        *,
+        source: str,
+        result: DownloadResult,
+        requested_by: uuid.UUID | None,
+    ) -> uuid.UUID:
+        """Store a freshly downloaded file and create a minimal ``pending`` track.
+
+        Returns the new track id (the caller enqueues enrichment after commit).
+        The temp file produced by the backend is always removed."""
+        track_id = uuid.uuid4()
+        key = f"tracks/{str(track_id)[:2]}/{track_id}.{result.file_format}"
+        try:
+            await self._storage.save_file(key, result.path)
+            try:
+                artist = await self._artists.get_or_create(_UNKNOWN_ARTIST)
+                await self._tracks.add(
+                    id=track_id,
+                    title=result.suggested_title,
+                    artist_id=artist.id,
+                    storage_uri=key,
+                    file_format=result.file_format,
+                    file_size=result.file_size,
+                    source=source,
+                    source_id=result.source_id,
+                    metadata_status="pending",
+                    added_by=requested_by,
+                )
+            except Exception:
+                with contextlib.suppress(Exception):
+                    await self._storage.delete(key)
+                raise
+        finally:
+            with contextlib.suppress(Exception):
+                await anyio.Path(result.path).unlink(missing_ok=True)
+        return track_id
@@ -71,6 +71,9 @@ class Settings(BaseSettings):
    media_path: Path = Path("/data/media")
    transcode_cache_path: Path = Path("/data/transcode-cache")
    max_parallel_downloads: int = 2
+    # How many times the download worker retries a failed fetch (yt-dlp fails
+    # often) before marking the job ``failed`` — exponential backoff between tries.
+    download_max_retries: int = 3
    storage_backend: Literal["local", "s3"] = "local"
    upload_tmp_dir: Path | None = None

@@ -100,6 +103,11 @@ class Settings(BaseSettings):
    # deployments should set their own contact email; see
    # ``musicbrainz_user_agent`` below for how it's used.
    musicbrainz_owner_email: str | None = None
+    # ``youtube`` fetch source (search + download via ytmusicapi/yt-dlp). Enabled
+    # by default; the source still reports unavailable if the libs aren't present.
+    youtube_enabled: bool = True
+    # Optional cookies file (Netscape format) for yt-dlp — lets it fetch
+    # age-restricted / region-locked items via an authenticated session.
    youtube_cookies_path: Path | None = None

    # -- enrichment -------------------------------------------------------
@@ -2,6 +2,7 @@

 from app.domain.entities.album import Album
 from app.domain.entities.cover import CoverArt
+from app.domain.entities.download import DownloadJob
 from app.domain.entities.history import PlayHistoryEntry
 from app.domain.entities.like import Like
 from app.domain.entities.metadata import AudioTags, Fingerprint, RecordingMatch
@@ -22,6 +23,7 @@ __all__ = [
    "CoverArt",
    "Credentials",
    "DiskUsage",
+    "DownloadJob",
    "Fingerprint",
    "FormatBreakdown",
    "LibraryStats",
@@ -0,0 +1,26 @@
+"""Download job domain entity (plan §6.1).
+
+A queued fetch from an external source, tracked through its lifecycle so the UI
+download manager (screen §A5) can show progress, errors, and retries. The
+``status`` strings mirror :class:`~app.infrastructure.db.models.enums.DownloadStatus`.
+"""
+
+import datetime as dt
+import uuid
+from dataclasses import dataclass
+
+
+@dataclass(frozen=True, slots=True)
+class DownloadJob:
+    id: uuid.UUID
+    source: str
+    source_id: str | None
+    query: str | None
+    requested_by: uuid.UUID | None
+    status: str
+    progress: float
+    error_message: str | None
+    retry_count: int
+    track_id: uuid.UUID | None
+    created_at: dt.datetime
+    updated_at: dt.datetime
@@ -7,7 +7,7 @@ are bound to these ports at the composition root (``app.api.deps``).

 import datetime as dt
 import uuid
-from collections.abc import AsyncIterator, Iterator
+from collections.abc import AsyncIterator, Awaitable, Callable, Iterator
 from contextlib import AbstractAsyncContextManager
 from pathlib import Path
 from typing import Protocol
@@ -18,6 +18,7 @@ from app.domain.entities import (
    CoverArt,
    Credentials,
    DiskUsage,
+    DownloadJob,
    Fingerprint,
    LibraryStats,
    Like,
@@ -29,9 +30,14 @@ from app.domain.entities import (
    User,
 )
 from app.domain.entities.track import Artist, Track
-from app.domain.sources import SourceFile, SourceInfo
+from app.domain.sources import DownloadResult, RawMetadata, SearchResult, SourceFile, SourceInfo
 from app.domain.tokens import IssuedToken, TokenClaims, TokenType

+# A fetch source reports download progress as a fraction in [0.0, 1.0]. It's a
+# plain callback (not a port) because it's an inversion of control supplied per
+# call by the worker, which persists it to the download job.
+ProgressCallback = Callable[[float], Awaitable[None]]
+

 class UserRepository(Protocol):
    async def get_by_id(self, user_id: uuid.UUID) -> User | None: ...
@@ -275,6 +281,54 @@ class HistoryRepository(Protocol):
    async def count(self, *, user_id: uuid.UUID) -> int: ...


+class DownloadJobRepository(Protocol):
+    """Persistence for download jobs (plan §6.1). Drives the §A5 download manager
+    and the worker's retry/backoff loop."""
+
+    async def add(
+        self,
+        *,
+        source: str,
+        source_id: str | None,
+        query: str | None,
+        requested_by: uuid.UUID | None,
+    ) -> DownloadJob: ...
+    async def get_by_id(self, job_id: uuid.UUID) -> DownloadJob | None: ...
+    async def get_active_for_source(self, source: str, source_id: str) -> DownloadJob | None:
+        """An unfinished (queued/downloading/enriching) job for the same item, if
+        any — used to dedup before enqueuing so a double-click can't queue twice."""
+        ...
+
+    async def list(
+        self,
+        *,
+        requested_by: uuid.UUID | None,
+        status: str | None,
+        limit: int,
+        offset: int,
+    ) -> list[DownloadJob]: ...
+    async def count(self, *, requested_by: uuid.UUID | None, status: str | None) -> int: ...
+    async def set_status(
+        self,
+        job_id: uuid.UUID,
+        *,
+        status: str,
+        error_message: str | None = None,
+        track_id: uuid.UUID | None = None,
+    ) -> None: ...
+    async def set_progress(self, job_id: uuid.UUID, progress: float) -> None: ...
+    async def increment_retry(self, job_id: uuid.UUID) -> int:
+        """Bump ``retry_count`` and return the new value."""
+        ...
+
+    async def delete(self, job_id: uuid.UUID) -> None: ...
+    async def failure_rate(self, source: str, *, since: dt.datetime) -> float:
+        """Fraction of jobs for ``source`` created since ``since`` that ended
+        ``failed`` (0.0 when there are none) — drives the §A5 "source unhealthy"
+        banner."""
+        ...
+
+
 class SourceBackend(Protocol):
    """A registered source of tracks (mounted folder, YouTube, …).

@@ -293,6 +347,29 @@ class IndexableSource(SourceBackend, Protocol):
    def scan(self) -> Iterator[SourceFile]: ...


+class SearchableSource(SourceBackend, Protocol):
+    """A source that can be searched by free text (e.g. YouTube Music).
+
+    Returns ``[]`` (never raises) on no results / the service being down — the
+    discover screen degrades to "nothing found" rather than erroring."""
+
+    async def search(self, query: str, *, limit: int) -> list[SearchResult]: ...
+
+
+class FetchableSource(SourceBackend, Protocol):
+    """A source that can download a previously-discovered item to local disk.
+
+    ``fetch`` resolves a ``source_id`` (from a :class:`SearchResult`) into a file
+    and reports progress through ``on_progress``. It runs only in a worker (heavy
+    I/O) and raises on failure so the download task can retry with backoff."""
+
+    async def fetch(
+        self, source_id: str, *, on_progress: ProgressCallback | None = None
+    ) -> DownloadResult: ...
+
+    async def get_metadata(self, source_id: str) -> RawMetadata | None: ...
+
+
 # -- metadata enrichment (plan §6.2) -----------------------------------------
 class AudioTagReader(Protocol):
    """Reads embedded tags from a local audio file. Returns ``None`` only when
@@ -10,8 +10,14 @@ here — a source yields a file plus a minimal title; enrichment (plan §6.2) fi
 the rest later, so this stays a thin discovery layer (CLAUDE.md: no duplicated
 business logic)."""

-from dataclasses import dataclass
+from dataclasses import dataclass, field
 from pathlib import Path
+from typing import Any
+
+# A source's ``kind`` describes which ports it satisfies, so the UI/admin can
+# tell an indexed folder from a searchable fetch-source. A backend may be both.
+KIND_INDEXABLE = "indexable"  # enumerates files already on disk (local folder)
+KIND_FETCH = "fetch"  # searches + downloads from an external service (YTM, …)


@dataclass(frozen=True, slots=True)
@@ -20,7 +26,7 @@ class SourceInfo:

    name: str
    label: str
-    kind: str  # "indexable" (more kinds — search/download — arrive with youtube)
+    kind: str  # KIND_INDEXABLE | KIND_FETCH
    available: bool


@@ -37,3 +43,53 @@ class SourceFile:
    suggested_title: str
    file_format: str
    file_size: int
+
+
+@dataclass(frozen=True, slots=True)
+class SearchResult:
+    """One hit from a searchable source (plan §5), shown on the discover screen.
+
+    ``source_id`` is the stable handle the same backend later resolves in
+    ``fetch`` — it must round-trip a download request without re-searching.
+    ``raw`` carries the backend's untouched payload for debugging / future use.
+    """
+
+    source: str
+    source_id: str
+    title: str
+    artist: str | None
+    album: str | None
+    duration_seconds: int | None
+    thumbnail_url: str | None
+    raw: dict[str, Any] = field(default_factory=dict)
+
+
+@dataclass(frozen=True, slots=True)
+class RawMetadata:
+    """Metadata a fetch-source can offer about an item *before* enrichment.
+
+    Best-effort and source-shaped — the canonical metadata still comes from the
+    enrichment pipeline (plan §6.2). Used to seed a more useful provisional
+    title than a bare id while a download is queued."""
+
+    title: str | None
+    artist: str | None
+    album: str | None
+    year: int | None
+    extra: dict[str, Any] = field(default_factory=dict)
+
+
+@dataclass(frozen=True, slots=True)
+class DownloadResult:
+    """A file a fetch-source produced on local disk (plan §5).
+
+    ``path`` is a temp file the caller owns: it is stored into managed storage
+    and then removed (same lifecycle as an upload). ``source_id`` is echoed back
+    because some backends only learn the canonical id during the download."""
+
+    source_id: str
+    path: Path
+    file_format: str
+    file_size: int
+    bitrate: int | None
+    suggested_title: str
@@ -35,3 +35,9 @@ class DownloadJobModel(UUIDPrimaryKeyMixin, TimestampMixin, Base):
    progress: Mapped[float] = mapped_column(Float, nullable=False, default=0.0)
    error_message: Mapped[str | None] = mapped_column(Text, nullable=True)
    retry_count: Mapped[int] = mapped_column(Integer, nullable=False, default=0)
+    # Set once the download finishes and the track is imported — lets the UI
+    # link a completed job to its library track.
+    track_id: Mapped[uuid.UUID | None] = mapped_column(
+        ForeignKey("tracks.id", ondelete="SET NULL"),
+        nullable=True,
+    )
@@ -2,6 +2,9 @@

 from app.infrastructure.db.repositories.album_repository import SqlAlchemyAlbumRepository
 from app.infrastructure.db.repositories.artist_repository import SqlAlchemyArtistRepository
+from app.infrastructure.db.repositories.download_job_repository import (
+    SqlAlchemyDownloadJobRepository,
+)
 from app.infrastructure.db.repositories.history_repository import SqlAlchemyHistoryRepository
 from app.infrastructure.db.repositories.like_repository import SqlAlchemyLikeRepository
 from app.infrastructure.db.repositories.playlist_repository import SqlAlchemyPlaylistRepository
@@ -14,6 +17,7 @@ from app.infrastructure.db.repositories.user_repository import SqlAlchemyUserRep
 __all__ = [
    "SqlAlchemyAlbumRepository",
    "SqlAlchemyArtistRepository",
+    "SqlAlchemyDownloadJobRepository",
    "SqlAlchemyHistoryRepository",
    "SqlAlchemyLikeRepository",
    "SqlAlchemyPlaylistRepository",
@@ -0,0 +1,164 @@
+"""Download job repository — adapter over ``AsyncSession`` (plan §6.1)."""
+
+import datetime as dt
+import uuid
+
+from sqlalchemy import func, select
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from app.domain.entities.download import DownloadJob
+from app.infrastructure.db.models.download_job import DownloadJobModel
+from app.infrastructure.db.models.enums import DownloadStatus
+
+# Jobs that are not yet finished — used to dedup an in-flight download.
+_ACTIVE_STATUSES = (
+    DownloadStatus.QUEUED.value,
+    DownloadStatus.DOWNLOADING.value,
+    DownloadStatus.ENRICHING.value,
+)
+
+
+def _to_entity(row: DownloadJobModel) -> DownloadJob:
+    return DownloadJob(
+        id=row.id,
+        source=row.source,
+        source_id=row.source_id,
+        query=row.query,
+        requested_by=row.requested_by,
+        status=row.status,
+        progress=row.progress,
+        error_message=row.error_message,
+        retry_count=row.retry_count,
+        track_id=row.track_id,
+        created_at=row.created_at,
+        updated_at=row.updated_at,
+    )
+
+
+class SqlAlchemyDownloadJobRepository:
+    def __init__(self, session: AsyncSession) -> None:
+        self._session = session
+
+    async def add(
+        self,
+        *,
+        source: str,
+        source_id: str | None,
+        query: str | None,
+        requested_by: uuid.UUID | None,
+    ) -> DownloadJob:
+        row = DownloadJobModel(
+            source=source,
+            source_id=source_id,
+            query=query,
+            requested_by=requested_by,
+            status=DownloadStatus.QUEUED.value,
+            progress=0.0,
+            retry_count=0,
+        )
+        self._session.add(row)
+        await self._session.flush()
+        await self._session.refresh(row)
+        return _to_entity(row)
+
+    async def get_by_id(self, job_id: uuid.UUID) -> DownloadJob | None:
+        row = await self._session.get(DownloadJobModel, job_id)
+        return _to_entity(row) if row is not None else None
+
+    async def get_active_for_source(self, source: str, source_id: str) -> DownloadJob | None:
+        row = (
+            await self._session.execute(
+                select(DownloadJobModel)
+                .where(
+                    DownloadJobModel.source == source,
+                    DownloadJobModel.source_id == source_id,
+                    DownloadJobModel.status.in_(_ACTIVE_STATUSES),
+                )
+                .order_by(DownloadJobModel.created_at.desc())
+                .limit(1)
+            )
+        ).scalar_one_or_none()
+        return _to_entity(row) if row is not None else None
+
+    async def list(
+        self,
+        *,
+        requested_by: uuid.UUID | None,
+        status: str | None,
+        limit: int,
+        offset: int,
+    ) -> list[DownloadJob]:
+        stmt = select(DownloadJobModel)
+        if requested_by is not None:
+            stmt = stmt.where(DownloadJobModel.requested_by == requested_by)
+        if status is not None:
+            stmt = stmt.where(DownloadJobModel.status == status)
+        stmt = stmt.order_by(DownloadJobModel.created_at.desc()).limit(limit).offset(offset)
+        rows = (await self._session.execute(stmt)).scalars().all()
+        return [_to_entity(r) for r in rows]
+
+    async def count(self, *, requested_by: uuid.UUID | None, status: str | None) -> int:
+        stmt = select(func.count()).select_from(DownloadJobModel)
+        if requested_by is not None:
+            stmt = stmt.where(DownloadJobModel.requested_by == requested_by)
+        if status is not None:
+            stmt = stmt.where(DownloadJobModel.status == status)
+        return (await self._session.execute(stmt)).scalar_one()
+
+    async def set_status(
+        self,
+        job_id: uuid.UUID,
+        *,
+        status: str,
+        error_message: str | None = None,
+        track_id: uuid.UUID | None = None,
+    ) -> None:
+        row = await self._session.get(DownloadJobModel, job_id)
+        if row is None:
+            return
+        row.status = status
+        # ``error_message`` is always written: a successful transition clears a
+        # stale reason from an earlier failed attempt.
+        row.error_message = error_message
+        if track_id is not None:
+            row.track_id = track_id
+        if status == DownloadStatus.DONE.value:
+            row.progress = 1.0
+        await self._session.flush()
+
+    async def set_progress(self, job_id: uuid.UUID, progress: float) -> None:
+        row = await self._session.get(DownloadJobModel, job_id)
+        if row is None:
+            return
+        row.progress = max(0.0, min(1.0, progress))
+        await self._session.flush()
+
+    async def increment_retry(self, job_id: uuid.UUID) -> int:
+        row = await self._session.get(DownloadJobModel, job_id)
+        if row is None:
+            return 0
+        row.retry_count += 1
+        await self._session.flush()
+        return row.retry_count
+
+    async def delete(self, job_id: uuid.UUID) -> None:
+        row = await self._session.get(DownloadJobModel, job_id)
+        if row is not None:
+            await self._session.delete(row)
+            await self._session.flush()
+
+    async def failure_rate(self, source: str, *, since: dt.datetime) -> float:
+        total, failed = (
+            await self._session.execute(
+                select(
+                    func.count(),
+                    func.count().filter(DownloadJobModel.status == DownloadStatus.FAILED.value),
+                )
+                .select_from(DownloadJobModel)
+                .where(
+                    DownloadJobModel.source == source,
+                    DownloadJobModel.created_at >= since,
+                )
+            )
+        ).one()
+        return (failed / total) if total else 0.0
@@ -78,7 +78,7 @@ class AcoustIdHttpClient:
                )
            resp.raise_for_status()
            return resp.json()  # type: ignore[no-any-return]
-        except (httpx.HTTPError, ValueError):
+        except httpx.HTTPError, ValueError:
            log.warning("acoustid_lookup_failed")
            return None

@@ -2,16 +2,18 @@

 Built from settings at the composition root. Only sources that are configured
 are registered (e.g. ``local`` appears only when ``LOCAL_MEDIA_IMPORT_PATH`` is
-set), so enumeration reflects what the instance can actually use.
+set; ``youtube`` only when ``YOUTUBE_ENABLED``), so enumeration reflects what the
+instance can actually use.
 """

 from typing import cast

 from app.core.config import Settings
 from app.domain.errors import NotFoundError, ValidationError
-from app.domain.ports import IndexableSource, SourceBackend
+from app.domain.ports import FetchableSource, IndexableSource, SearchableSource, SourceBackend
 from app.domain.sources import SourceInfo
 from app.infrastructure.sources.local_folder import LocalFolderSource
+from app.infrastructure.sources.youtube import YouTubeMusicSource


 class SourceRegistry:
@@ -30,6 +32,22 @@ class SourceRegistry:
            raise ValidationError(f"Source {name!r} cannot be indexed.")
        return cast(IndexableSource, backend)

+    def searchable(self, name: str) -> SearchableSource:
+        backend = self.get(name)
+        if not hasattr(backend, "search"):
+            raise ValidationError(f"Source {name!r} cannot be searched.")
+        return cast(SearchableSource, backend)
+
+    def fetchable(self, name: str) -> FetchableSource:
+        backend = self.get(name)
+        if not hasattr(backend, "fetch"):
+            raise ValidationError(f"Source {name!r} cannot download.")
+        return cast(FetchableSource, backend)
+
+    def searchables(self) -> list[SearchableSource]:
+        """Every registered source that supports search (for cross-source search)."""
+        return [cast(SearchableSource, b) for b in self._by_name.values() if hasattr(b, "search")]
+
    def infos(self) -> list[SourceInfo]:
        return [backend.info() for backend in self._by_name.values()]

@@ -38,4 +56,11 @@ def build_source_registry(settings: Settings) -> SourceRegistry:
    backends: list[SourceBackend] = []
    if settings.local_media_import_path is not None:
        backends.append(LocalFolderSource(settings.local_media_import_path))
+    if settings.youtube_enabled:
+        backends.append(
+            YouTubeMusicSource(
+                cookies_path=settings.youtube_cookies_path,
+                tmp_dir=settings.upload_tmp_dir,
+            )
+        )
    return SourceRegistry(backends)
@@ -0,0 +1,207 @@
+"""``youtube`` source — YouTube Music search + download (plan §5).
+
+A *fetch* source: it searches YouTube Music (via ``ytmusicapi``, which returns
+clean song/artist/album/duration rows) and downloads the chosen item with
+``yt-dlp``. The two libraries are synchronous, so every call is bounced to a
+worker thread (``anyio.to_thread``); the sync yt-dlp progress hook bridges back
+to the async progress callback via ``anyio.from_thread``.
+
+Both libraries are optional dependencies — if either is missing the source is
+simply *unavailable* (it never crashes import or the registry; graceful
+degradation per CLAUDE.md). The audio stream is stored **as-is** (YouTube serves
+lossy Opus/AAC; re-encoding would be lossy→lossy, plan §6.6).
+
+``source_id`` is the YouTube ``videoId`` — stable, so a re-download of the same
+id is idempotent and dedups against an existing track.
+"""
+
+import functools
+import tempfile
+from collections.abc import Callable
+from pathlib import Path
+from typing import Any
+
+import anyio
+
+from app.core.logging import get_logger
+from app.domain.ports import ProgressCallback
+from app.domain.sources import (
+    KIND_FETCH,
+    DownloadResult,
+    RawMetadata,
+    SearchResult,
+    SourceInfo,
+)
+from app.infrastructure.db.models.enums import TrackSource
+
+log = get_logger(__name__)
+
+# Functions a caller may inject for testing (defaults do the real library work).
+SearchFn = Callable[[str, int], list[dict[str, Any]]]
+# (video_id, tmp_dir, progress_hook, cookies_path) -> normalized download dict
+DownloadFn = Callable[[str, Path, Callable[[dict[str, Any]], None], Path | None], dict[str, Any]]
+
+
+def _libs_available() -> bool:
+    try:
+        import yt_dlp  # noqa: F401
+        import ytmusicapi  # noqa: F401
+    except ImportError:
+        return False
+    return True
+
+
+def _watch_url(video_id: str) -> str:
+    return f"https://music.youtube.com/watch?v={video_id}"
+
+
+class YouTubeMusicSource:
+    """Implements :class:`app.domain.ports.SearchableSource` and
+    :class:`~app.domain.ports.FetchableSource`."""
+
+    name = TrackSource.YOUTUBE.value
+
+    def __init__(
+        self,
+        *,
+        cookies_path: Path | None = None,
+        tmp_dir: Path | None = None,
+        search_fn: SearchFn | None = None,
+        download_fn: DownloadFn | None = None,
+    ) -> None:
+        self._cookies_path = cookies_path
+        self._tmp_dir = tmp_dir
+        self._search_fn = search_fn or _default_search
+        self._download_fn = download_fn or _default_download
+        # Only the real library path needs the deps; an injected fn is self-contained.
+        self._injected = search_fn is not None or download_fn is not None
+
+    def info(self) -> SourceInfo:
+        return SourceInfo(
+            name=self.name,
+            label="YouTube Music",
+            kind=KIND_FETCH,
+            available=self.is_available(),
+        )
+
+    def is_available(self) -> bool:
+        return True if self._injected else _libs_available()
+
+    async def search(self, query: str, *, limit: int) -> list[SearchResult]:
+        query = query.strip()
+        if not query:
+            return []
+        try:
+            rows = await anyio.to_thread.run_sync(functools.partial(self._search_fn, query, limit))
+        except Exception:
+            # No results / service down → degrade to empty (plan §5, CLAUDE.md).
+            log.warning("ytm_search_failed", query=query)
+            return []
+        return [r for r in (self._to_result(row) for row in rows) if r is not None]
+
+    async def fetch(
+        self, source_id: str, *, on_progress: ProgressCallback | None = None
+    ) -> DownloadResult:
+        tmp_dir = self._tmp_dir or Path(tempfile.gettempdir())
+
+        def hook(d: dict[str, Any]) -> None:
+            if on_progress is None or d.get("status") != "downloading":
+                return
+            total = d.get("total_bytes") or d.get("total_bytes_estimate")
+            done = d.get("downloaded_bytes")
+            if not total or done is None:
+                return
+            # Cap below 1.0 — the job only reaches 1.0 once stored + imported.
+            frac = min(done / total, 0.99)
+            # Bridge sync hook (worker thread) → async callback (event loop).
+            anyio.from_thread.run(on_progress, frac)
+
+        def _run() -> dict[str, Any]:
+            return self._download_fn(source_id, tmp_dir, hook, self._cookies_path)
+
+        info = await anyio.to_thread.run_sync(_run)
+        path = Path(info["filepath"])
+        stat = await anyio.Path(path).stat()
+        return DownloadResult(
+            source_id=source_id,
+            path=path,
+            file_format=info["file_format"],
+            file_size=stat.st_size,
+            bitrate=info.get("bitrate"),
+            suggested_title=info.get("title") or source_id,
+        )
+
+    async def get_metadata(self, source_id: str) -> RawMetadata | None:
+        # The search result already carries a usable title/artist, and the
+        # canonical metadata comes from enrichment (§6.2). A dedicated lookup is
+        # an optional refinement — skipped for now (returns None gracefully).
+        return None
+
+    def _to_result(self, row: dict[str, Any]) -> SearchResult | None:
+        video_id = row.get("videoId")
+        if not video_id:
+            return None  # non-playable row (e.g. a video without audio id)
+        artists = row.get("artists") or []
+        artist = ", ".join(a["name"] for a in artists if a.get("name")) or None
+        album = (row.get("album") or {}).get("name") if isinstance(row.get("album"), dict) else None
+        thumbnails = row.get("thumbnails") or []
+        thumbnail = thumbnails[-1].get("url") if thumbnails else None
+        return SearchResult(
+            source=self.name,
+            source_id=str(video_id),
+            title=row.get("title") or "Unknown",
+            artist=artist,
+            album=album,
+            duration_seconds=row.get("duration_seconds"),
+            thumbnail_url=thumbnail,
+            raw=row,
+        )
+
+
+def _default_search(query: str, limit: int) -> list[dict[str, Any]]:
+    """Real ytmusicapi search (songs only). Runs in a worker thread."""
+    from ytmusicapi import YTMusic
+
+    yt = YTMusic()  # unauthenticated: public search needs no login
+    results: list[dict[str, Any]] = yt.search(query, filter="songs", limit=limit)
+    return results[:limit]
+
+
+def _default_download(
+    video_id: str,
+    tmp_dir: Path,
+    progress_hook: Callable[[dict[str, Any]], None],
+    cookies_path: Path | None,
+) -> dict[str, Any]:
+    """Real yt-dlp download of the best audio stream. Runs in a worker thread.
+
+    Stores the original stream (no transcode — plan §6.3/§6.6). Returns a
+    normalized dict the adapter maps to :class:`DownloadResult`.
+    """
+    from yt_dlp import YoutubeDL
+
+    opts: dict[str, Any] = {
+        "format": "bestaudio/best",
+        "outtmpl": str(tmp_dir / "%(id)s.%(ext)s"),
+        "quiet": True,
+        "no_warnings": True,
+        "noprogress": True,
+        "progress_hooks": [progress_hook],
+    }
+    # Use cookies only when the file is actually present: the path can be set
+    # unconditionally (e.g. a mounted volume that may be empty) and downloads
+    # still work without it — cookies just unlock age/region-restricted items.
+    if cookies_path is not None and cookies_path.is_file():
+        opts["cookiefile"] = str(cookies_path)
+
+    with YoutubeDL(opts) as ydl:
+        info = ydl.extract_info(_watch_url(video_id), download=True)
+        filepath = Path(ydl.prepare_filename(info))
+
+    abr = info.get("abr")
+    return {
+        "filepath": filepath,
+        "file_format": filepath.suffix.lstrip(".").lower() or "m4a",
+        "bitrate": int(abr) if abr else None,
+        "title": info.get("title"),
+    }
@@ -1,7 +1,6 @@
 """arq worker settings — the queue runtime. Task functions register here.

 Run with: ``arq app.workers.arq_worker.WorkerSettings``.
-Tasks (download, transcode) are appended to ``functions`` in later steps.
 """

 from typing import Any, ClassVar
@@ -10,6 +9,7 @@ from arq.connections import RedisSettings

 from app.core.config import get_settings
 from app.core.logging import configure_logging, get_logger
+from app.workers.tasks.download_task import download_track
 from app.workers.tasks.enrich_task import enrich_track
 from app.workers.tasks.import_task import scan_local_folder

@@ -27,7 +27,7 @@ async def shutdown(_ctx: dict[str, Any]) -> None:


 class WorkerSettings:
-    functions: ClassVar[list[Any]] = [scan_local_folder, enrich_track]
+    functions: ClassVar[list[Any]] = [scan_local_folder, enrich_track, download_track]
    on_startup = startup
    on_shutdown = shutdown
    max_jobs = get_settings().max_parallel_downloads
@@ -34,6 +34,20 @@ async def enqueue(function: str, **kwargs: Any) -> str:
    return str(job.job_id)


+async def enqueue_download(job_id: uuid.UUID) -> None:
+    """Best-effort enqueue of a download job for the worker.
+
+    The job row is already persisted as ``queued``, so this is a follow-up, not a
+    barrier: if the queue is unreachable we log and move on (graceful
+    degradation) — the job stays ``queued`` and can be retried later. Deferred a
+    few seconds so the request's DB transaction commits before the worker reads
+    the row (same reason as :func:`enqueue_enrich`)."""
+    try:
+        await enqueue("download_track", job_id=str(job_id), _defer_by=3)
+    except DependencyUnavailableError:
+        log.warning("download_enqueue_failed", job_id=str(job_id))
+
+
 async def enqueue_enrich(track_id: uuid.UUID) -> None:
    """Best-effort enqueue of metadata enrichment for a freshly stored track.

@@ -0,0 +1,151 @@
+"""arq task: download one queued job through a fetch source (plan §6.1).
+
+Flow: load job → ``downloading`` → ``backend.fetch`` (progress streamed to the
+job row) → ``enriching`` → store file + minimal track → ``done`` → enqueue
+enrichment. yt-dlp fails often, so a failed fetch retries with exponential
+backoff (``download_max_retries``); only after the last try is the job marked
+``failed`` with a reason for the §A5 download manager.
+
+Heavy I/O belongs off the request cycle (CLAUDE.md); the HTTP endpoint only
+enqueues. The job row tolerates being deleted mid-flight (cancellation) — status
+writes against a missing row are no-ops.
+"""
+
+import uuid
+from typing import Any
+
+from arq import Retry
+
+from app.application.download_service import DownloadService
+from app.core.config import get_settings
+from app.core.logging import correlation_id, get_logger
+from app.domain.entities.download import DownloadJob
+from app.domain.errors import NotFoundError, ValidationError
+from app.domain.ports import FetchableSource, ProgressCallback
+from app.domain.sources import DownloadResult
+from app.infrastructure.db import session_scope
+from app.infrastructure.db.repositories import (
+    SqlAlchemyArtistRepository,
+    SqlAlchemyDownloadJobRepository,
+    SqlAlchemyTrackRepository,
+)
+from app.infrastructure.sources.registry import build_source_registry
+from app.infrastructure.storage.provider import get_file_storage
+from app.workers.queue import enqueue_enrich
+
+log = get_logger("worker.download")
+
+# Exponential backoff between retries: 30s, 60s, 120s … capped.
+_BACKOFF_BASE_SECONDS = 30
+_BACKOFF_MAX_SECONDS = 600
+# Only write progress when it advances by at least this much (avoid hammering
+# the DB on every yt-dlp chunk).
+_PROGRESS_STEP = 0.01
+
+
+async def download_track(_ctx: dict[str, Any], *, job_id: str) -> dict[str, Any]:
+    correlation_id.set(f"dl:{job_id}")
+    jid = uuid.UUID(job_id)
+    settings = get_settings()
+
+    job = await _load_job(jid)
+    if job is None:
+        log.info("download_job_missing", job_id=job_id)  # cancelled before pickup
+        return {"job_id": job_id, "status": "missing"}
+
+    registry = build_source_registry(settings)
+    try:
+        backend = registry.fetchable(job.source)
+    except (NotFoundError, ValidationError) as exc:
+        await _mark_failed(jid, f"Source unavailable: {exc}")
+        return {"job_id": job_id, "status": "failed"}
+    if job.source_id is None:
+        await _mark_failed(jid, "Job has no source_id to download.")
+        return {"job_id": job_id, "status": "failed"}
+
+    await _set_status(jid, "downloading")
+    try:
+        result = await _run_fetch(backend, job.source_id, jid)
+    except Exception as exc:
+        return await _handle_failure(jid, exc, settings.download_max_retries, job_id)
+
+    try:
+        track_id = await _import_result(jid, job, result)
+    except Exception as exc:
+        log.exception("download_import_failed", job_id=job_id)
+        await _mark_failed(jid, f"Import failed: {type(exc).__name__}: {exc}")
+        return {"job_id": job_id, "status": "failed"}
+
+    await enqueue_enrich(track_id)
+    log.info("download_complete", job_id=job_id, track_id=str(track_id))
+    return {"job_id": job_id, "status": "done", "track_id": str(track_id)}
+
+
+async def _run_fetch(
+    backend: FetchableSource, source_id: str, jid: uuid.UUID
+) -> DownloadResult:
+    """Fetch the file, streaming progress into the job row. A single session is
+    held for the download so progress writes don't churn connections; each
+    throttled update is committed so API pollers see it."""
+    async with session_scope() as session:
+        repo = SqlAlchemyDownloadJobRepository(session)
+        last = 0.0
+
+        async def on_progress(frac: float) -> None:
+            nonlocal last
+            if frac - last < _PROGRESS_STEP:
+                return
+            last = frac
+            await repo.set_progress(jid, frac)
+            await session.commit()
+
+        cb: ProgressCallback = on_progress
+        return await backend.fetch(source_id, on_progress=cb)
+
+
+async def _import_result(jid: uuid.UUID, job: DownloadJob, result: DownloadResult) -> uuid.UUID:
+    async with session_scope() as session:
+        repo = SqlAlchemyDownloadJobRepository(session)
+        await repo.set_status(jid, status="enriching")
+        service = DownloadService(
+            jobs=repo,
+            tracks=SqlAlchemyTrackRepository(session),
+            artists=SqlAlchemyArtistRepository(session),
+            storage=get_file_storage(),
+        )
+        track_id = await service.store_result(
+            source=job.source, result=result, requested_by=job.requested_by
+        )
+        await repo.set_status(jid, status="done", track_id=track_id)
+    return track_id
+
+
+async def _handle_failure(
+    jid: uuid.UUID, exc: Exception, max_retries: int, job_id: str
+) -> dict[str, Any]:
+    async with session_scope() as session:
+        tries = await SqlAlchemyDownloadJobRepository(session).increment_retry(jid)
+    if tries <= max_retries:
+        backoff = min(_BACKOFF_BASE_SECONDS * 2 ** (tries - 1), _BACKOFF_MAX_SECONDS)
+        log.warning("download_retry", job_id=job_id, attempt=tries, defer=backoff)
+        raise Retry(defer=backoff) from exc
+    log.exception("download_failed", job_id=job_id)
+    await _mark_failed(jid, f"Download failed after {tries} attempts: {type(exc).__name__}: {exc}")
+    return {"job_id": job_id, "status": "failed"}
+
+
+async def _load_job(jid: uuid.UUID) -> DownloadJob | None:
+    async with session_scope() as session:
+        return await SqlAlchemyDownloadJobRepository(session).get_by_id(jid)
+
+
+async def _set_status(jid: uuid.UUID, status: str) -> None:
+    async with session_scope() as session:
+        await SqlAlchemyDownloadJobRepository(session).set_status(jid, status=status)
+
+
+async def _mark_failed(jid: uuid.UUID, error: str) -> None:
+    async with session_scope() as session:
+        await SqlAlchemyDownloadJobRepository(session).set_status(
+            jid, status="failed", error_message=error
+        )