feat(sources): YouTube Music search + download pipeline (§1C/§1E)

Pluggable fetch source: ytmusicapi search + yt-dlp download (cookies-file guard), DownloadJob entity/repo + DownloadService, download_task worker with exponential-backoff retries, and wired /search, /sources/{source}/search, and /downloads endpoints. Adds youtube_enabled/cookies config, yt-dlp+ytmusicapi deps, and the download_jobs.track_id migration. Snapshot also bundles in-progress storage/tracks/acoustid edits. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-14 14:04:33 +03:00
parent ea880edd57
commit 78007461e1
32 changed files with 2645 additions and 819 deletions
@@ -0,0 +1,183 @@
+"""DownloadService — request external downloads and import their results.
+
+Two roles (plan §6.1):
+
+* **Request side** (HTTP): validate + dedup a download request, create a
+  ``queued`` job, and enqueue the worker. Dedup is on ``(source, source_id)``
+  against both the library (already imported) and in-flight jobs (a double-click
+  must not queue twice) — idempotency per CLAUDE.md.
+* **Worker side**: ``store_result`` turns a backend's :class:`DownloadResult`
+  into a managed file + minimal ``pending`` track (sibling of
+  :class:`~app.application.import_service.LibraryImportService`); enrichment
+  (§6.2) fills the rest.
+
+The fingerprint-level dedup (a different id that turns out to be the same audio)
+happens later in enrichment, where the fingerprint is computed.
+"""
+
+import contextlib
+import uuid
+from collections.abc import Awaitable, Callable
+from dataclasses import dataclass
+
+import anyio
+
+from app.core.logging import get_logger
+from app.domain.entities.download import DownloadJob
+from app.domain.errors import NotFoundError, ValidationError
+from app.domain.ports import (
+    ArtistRepository,
+    DownloadJobRepository,
+    FileStorage,
+    TrackRepository,
+)
+from app.domain.sources import DownloadResult
+
+log = get_logger(__name__)
+
+_UNKNOWN_ARTIST = "Unknown Artist"
+
+# (job_id) -> None — enqueue the download worker, deferred so the job row is
+# committed before the worker reads it (same pattern as enrich).
+DownloadEnqueuer = Callable[[uuid.UUID], Awaitable[None]]
+EnrichEnqueuer = Callable[[uuid.UUID], Awaitable[None]]
+
+
+@dataclass(frozen=True)
+class DownloadRequest:
+    """Outcome of asking for a download.
+
+    Exactly one of the three states holds: the item is already in the library
+    (``track_id`` set, ``already_in_library``), a job already covers it / was
+    just created (``job`` set), so the UI can route to the download manager.
+    """
+
+    job: DownloadJob | None
+    track_id: uuid.UUID | None
+    already_in_library: bool
+
+
+class DownloadService:
+    def __init__(
+        self,
+        *,
+        jobs: DownloadJobRepository,
+        tracks: TrackRepository,
+        artists: ArtistRepository,
+        storage: FileStorage,
+        enqueue_download: DownloadEnqueuer | None = None,
+        enqueue_enrich: EnrichEnqueuer | None = None,
+    ) -> None:
+        self._jobs = jobs
+        self._tracks = tracks
+        self._artists = artists
+        self._storage = storage
+        self._enqueue_download = enqueue_download
+        self._enqueue_enrich = enqueue_enrich
+
+    # -- request side ---------------------------------------------------------
+    async def request(
+        self,
+        *,
+        source: str,
+        source_id: str,
+        query: str | None,
+        requested_by: uuid.UUID | None,
+    ) -> DownloadRequest:
+        source_id = source_id.strip()
+        if not source_id:
+            raise ValidationError("A source_id is required to download.")
+
+        existing = await self._tracks.get_by_source(source, source_id)
+        if existing is not None:
+            return DownloadRequest(job=None, track_id=existing.id, already_in_library=True)
+
+        active = await self._jobs.get_active_for_source(source, source_id)
+        if active is not None:
+            return DownloadRequest(job=active, track_id=None, already_in_library=False)
+
+        job = await self._jobs.add(
+            source=source,
+            source_id=source_id,
+            query=query,
+            requested_by=requested_by,
+        )
+        if self._enqueue_download is not None:
+            await self._enqueue_download(job.id)
+        return DownloadRequest(job=job, track_id=None, already_in_library=False)
+
+    async def list(
+        self,
+        *,
+        requested_by: uuid.UUID | None,
+        status: str | None,
+        limit: int,
+        offset: int,
+    ) -> tuple[list[DownloadJob], int]:
+        jobs = await self._jobs.list(
+            requested_by=requested_by, status=status, limit=limit, offset=offset
+        )
+        total = await self._jobs.count(requested_by=requested_by, status=status)
+        return jobs, total
+
+    async def get(self, job_id: uuid.UUID) -> DownloadJob:
+        job = await self._jobs.get_by_id(job_id)
+        if job is None:
+            raise NotFoundError(f"Download job {job_id} not found.")
+        return job
+
+    async def cancel(self, job_id: uuid.UUID) -> None:
+        """Remove the job record. True mid-flight cancellation of an in-progress
+        yt-dlp download is out of scope (MVP); the worker tolerates a vanished
+        job row (its status writes become no-ops)."""
+        job = await self._jobs.get_by_id(job_id)
+        if job is None:
+            raise NotFoundError(f"Download job {job_id} not found.")
+        await self._jobs.delete(job_id)
+
+    async def retry(self, job_id: uuid.UUID) -> DownloadJob:
+        job = await self.get(job_id)
+        await self._jobs.set_status(job_id, status="queued", error_message=None)
+        if self._enqueue_download is not None:
+            await self._enqueue_download(job_id)
+        refreshed = await self._jobs.get_by_id(job_id)
+        return refreshed if refreshed is not None else job
+
+    # -- worker side ----------------------------------------------------------
+    async def store_result(
+        self,
+        *,
+        source: str,
+        result: DownloadResult,
+        requested_by: uuid.UUID | None,
+    ) -> uuid.UUID:
+        """Store a freshly downloaded file and create a minimal ``pending`` track.
+
+        Returns the new track id (the caller enqueues enrichment after commit).
+        The temp file produced by the backend is always removed."""
+        track_id = uuid.uuid4()
+        key = f"tracks/{str(track_id)[:2]}/{track_id}.{result.file_format}"
+        try:
+            await self._storage.save_file(key, result.path)
+            try:
+                artist = await self._artists.get_or_create(_UNKNOWN_ARTIST)
+                await self._tracks.add(
+                    id=track_id,
+                    title=result.suggested_title,
+                    artist_id=artist.id,
+                    storage_uri=key,
+                    file_format=result.file_format,
+                    file_size=result.file_size,
+                    source=source,
+                    source_id=result.source_id,
+                    metadata_status="pending",
+                    added_by=requested_by,
+                )
+            except Exception:
+                with contextlib.suppress(Exception):
+                    await self._storage.delete(key)
+                raise
+        finally:
+            with contextlib.suppress(Exception):
+                await anyio.Path(result.path).unlink(missing_ok=True)
+        return track_id