feat(sources): YouTube Music search + download pipeline (§1C/§1E)

Pluggable fetch source: ytmusicapi search + yt-dlp download (cookies-file guard), DownloadJob entity/repo + DownloadService, download_task worker with exponential-backoff retries, and wired /search, /sources/{source}/search, and /downloads endpoints. Adds youtube_enabled/cookies config, yt-dlp+ytmusicapi deps, and the download_jobs.track_id migration. Snapshot also bundles in-progress storage/tracks/acoustid edits. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-14 14:04:33 +03:00
parent ea880edd57
commit 78007461e1
32 changed files with 2645 additions and 819 deletions
@@ -2,6 +2,7 @@

 from app.domain.entities.album import Album
 from app.domain.entities.cover import CoverArt
+from app.domain.entities.download import DownloadJob
 from app.domain.entities.history import PlayHistoryEntry
 from app.domain.entities.like import Like
 from app.domain.entities.metadata import AudioTags, Fingerprint, RecordingMatch
@@ -22,6 +23,7 @@ __all__ = [
    "CoverArt",
    "Credentials",
    "DiskUsage",
+    "DownloadJob",
    "Fingerprint",
    "FormatBreakdown",
    "LibraryStats",
@@ -0,0 +1,26 @@
+"""Download job domain entity (plan §6.1).
+
+A queued fetch from an external source, tracked through its lifecycle so the UI
+download manager (screen §A5) can show progress, errors, and retries. The
+``status`` strings mirror :class:`~app.infrastructure.db.models.enums.DownloadStatus`.
+"""
+
+import datetime as dt
+import uuid
+from dataclasses import dataclass
+
+
+@dataclass(frozen=True, slots=True)
+class DownloadJob:
+    id: uuid.UUID
+    source: str
+    source_id: str | None
+    query: str | None
+    requested_by: uuid.UUID | None
+    status: str
+    progress: float
+    error_message: str | None
+    retry_count: int
+    track_id: uuid.UUID | None
+    created_at: dt.datetime
+    updated_at: dt.datetime
@@ -7,7 +7,7 @@ are bound to these ports at the composition root (``app.api.deps``).

 import datetime as dt
 import uuid
-from collections.abc import AsyncIterator, Iterator
+from collections.abc import AsyncIterator, Awaitable, Callable, Iterator
 from contextlib import AbstractAsyncContextManager
 from pathlib import Path
 from typing import Protocol
@@ -18,6 +18,7 @@ from app.domain.entities import (
    CoverArt,
    Credentials,
    DiskUsage,
+    DownloadJob,
    Fingerprint,
    LibraryStats,
    Like,
@@ -29,9 +30,14 @@ from app.domain.entities import (
    User,
 )
 from app.domain.entities.track import Artist, Track
-from app.domain.sources import SourceFile, SourceInfo
+from app.domain.sources import DownloadResult, RawMetadata, SearchResult, SourceFile, SourceInfo
 from app.domain.tokens import IssuedToken, TokenClaims, TokenType

+# A fetch source reports download progress as a fraction in [0.0, 1.0]. It's a
+# plain callback (not a port) because it's an inversion of control supplied per
+# call by the worker, which persists it to the download job.
+ProgressCallback = Callable[[float], Awaitable[None]]
+

 class UserRepository(Protocol):
    async def get_by_id(self, user_id: uuid.UUID) -> User | None: ...
@@ -275,6 +281,54 @@ class HistoryRepository(Protocol):
    async def count(self, *, user_id: uuid.UUID) -> int: ...


+class DownloadJobRepository(Protocol):
+    """Persistence for download jobs (plan §6.1). Drives the §A5 download manager
+    and the worker's retry/backoff loop."""
+
+    async def add(
+        self,
+        *,
+        source: str,
+        source_id: str | None,
+        query: str | None,
+        requested_by: uuid.UUID | None,
+    ) -> DownloadJob: ...
+    async def get_by_id(self, job_id: uuid.UUID) -> DownloadJob | None: ...
+    async def get_active_for_source(self, source: str, source_id: str) -> DownloadJob | None:
+        """An unfinished (queued/downloading/enriching) job for the same item, if
+        any — used to dedup before enqueuing so a double-click can't queue twice."""
+        ...
+
+    async def list(
+        self,
+        *,
+        requested_by: uuid.UUID | None,
+        status: str | None,
+        limit: int,
+        offset: int,
+    ) -> list[DownloadJob]: ...
+    async def count(self, *, requested_by: uuid.UUID | None, status: str | None) -> int: ...
+    async def set_status(
+        self,
+        job_id: uuid.UUID,
+        *,
+        status: str,
+        error_message: str | None = None,
+        track_id: uuid.UUID | None = None,
+    ) -> None: ...
+    async def set_progress(self, job_id: uuid.UUID, progress: float) -> None: ...
+    async def increment_retry(self, job_id: uuid.UUID) -> int:
+        """Bump ``retry_count`` and return the new value."""
+        ...
+
+    async def delete(self, job_id: uuid.UUID) -> None: ...
+    async def failure_rate(self, source: str, *, since: dt.datetime) -> float:
+        """Fraction of jobs for ``source`` created since ``since`` that ended
+        ``failed`` (0.0 when there are none) — drives the §A5 "source unhealthy"
+        banner."""
+        ...
+
+
 class SourceBackend(Protocol):
    """A registered source of tracks (mounted folder, YouTube, …).

@@ -293,6 +347,29 @@ class IndexableSource(SourceBackend, Protocol):
    def scan(self) -> Iterator[SourceFile]: ...


+class SearchableSource(SourceBackend, Protocol):
+    """A source that can be searched by free text (e.g. YouTube Music).
+
+    Returns ``[]`` (never raises) on no results / the service being down — the
+    discover screen degrades to "nothing found" rather than erroring."""
+
+    async def search(self, query: str, *, limit: int) -> list[SearchResult]: ...
+
+
+class FetchableSource(SourceBackend, Protocol):
+    """A source that can download a previously-discovered item to local disk.
+
+    ``fetch`` resolves a ``source_id`` (from a :class:`SearchResult`) into a file
+    and reports progress through ``on_progress``. It runs only in a worker (heavy
+    I/O) and raises on failure so the download task can retry with backoff."""
+
+    async def fetch(
+        self, source_id: str, *, on_progress: ProgressCallback | None = None
+    ) -> DownloadResult: ...
+
+    async def get_metadata(self, source_id: str) -> RawMetadata | None: ...
+
+
 # -- metadata enrichment (plan §6.2) -----------------------------------------
 class AudioTagReader(Protocol):
    """Reads embedded tags from a local audio file. Returns ``None`` only when
@@ -10,8 +10,14 @@ here — a source yields a file plus a minimal title; enrichment (plan §6.2) fi
 the rest later, so this stays a thin discovery layer (CLAUDE.md: no duplicated
 business logic)."""

-from dataclasses import dataclass
+from dataclasses import dataclass, field
 from pathlib import Path
+from typing import Any
+
+# A source's ``kind`` describes which ports it satisfies, so the UI/admin can
+# tell an indexed folder from a searchable fetch-source. A backend may be both.
+KIND_INDEXABLE = "indexable"  # enumerates files already on disk (local folder)
+KIND_FETCH = "fetch"  # searches + downloads from an external service (YTM, …)


@dataclass(frozen=True, slots=True)
@@ -20,7 +26,7 @@ class SourceInfo:

    name: str
    label: str
-    kind: str  # "indexable" (more kinds — search/download — arrive with youtube)
+    kind: str  # KIND_INDEXABLE | KIND_FETCH
    available: bool


@@ -37,3 +43,53 @@ class SourceFile:
    suggested_title: str
    file_format: str
    file_size: int
+
+
+@dataclass(frozen=True, slots=True)
+class SearchResult:
+    """One hit from a searchable source (plan §5), shown on the discover screen.
+
+    ``source_id`` is the stable handle the same backend later resolves in
+    ``fetch`` — it must round-trip a download request without re-searching.
+    ``raw`` carries the backend's untouched payload for debugging / future use.
+    """
+
+    source: str
+    source_id: str
+    title: str
+    artist: str | None
+    album: str | None
+    duration_seconds: int | None
+    thumbnail_url: str | None
+    raw: dict[str, Any] = field(default_factory=dict)
+
+
+@dataclass(frozen=True, slots=True)
+class RawMetadata:
+    """Metadata a fetch-source can offer about an item *before* enrichment.
+
+    Best-effort and source-shaped — the canonical metadata still comes from the
+    enrichment pipeline (plan §6.2). Used to seed a more useful provisional
+    title than a bare id while a download is queued."""
+
+    title: str | None
+    artist: str | None
+    album: str | None
+    year: int | None
+    extra: dict[str, Any] = field(default_factory=dict)
+
+
+@dataclass(frozen=True, slots=True)
+class DownloadResult:
+    """A file a fetch-source produced on local disk (plan §5).
+
+    ``path`` is a temp file the caller owns: it is stored into managed storage
+    and then removed (same lifecycle as an upload). ``source_id`` is echoed back
+    because some backends only learn the canonical id during the download."""
+
+    source_id: str
+    path: Path
+    file_format: str
+    file_size: int
+    bitrate: int | None
+    suggested_title: str