feat(sources): YouTube Music search + download pipeline (§1C/§1E)

Pluggable fetch source: ytmusicapi search + yt-dlp download (cookies-file guard), DownloadJob entity/repo + DownloadService, download_task worker with exponential-backoff retries, and wired /search, /sources/{source}/search, and /downloads endpoints. Adds youtube_enabled/cookies config, yt-dlp+ytmusicapi deps, and the download_jobs.track_id migration. Snapshot also bundles in-progress storage/tracks/acoustid edits. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-14 14:04:33 +03:00
parent ea880edd57
commit 78007461e1
32 changed files with 2645 additions and 819 deletions
@@ -15,6 +15,7 @@ from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
 from sqlalchemy.ext.asyncio import AsyncSession

 from app.application.auth_service import AuthService
+from app.application.download_service import DownloadService
 from app.application.metadata_service import MetadataEnrichmentService
 from app.application.streaming_service import StreamingService
 from app.application.subsonic_auth_service import SubsonicAuthService
@@ -29,6 +30,7 @@ from app.infrastructure.db import get_sessionmaker
 from app.infrastructure.db.repositories import (
    SqlAlchemyAlbumRepository,
    SqlAlchemyArtistRepository,
+    SqlAlchemyDownloadJobRepository,
    SqlAlchemyHistoryRepository,
    SqlAlchemyLikeRepository,
    SqlAlchemyPlaylistRepository,
@@ -41,7 +43,7 @@ from app.infrastructure.metadata.fingerprint import FpcalcFingerprinter
 from app.infrastructure.metadata.tags import MutagenTagReader
 from app.infrastructure.sources.registry import SourceRegistry, build_source_registry
 from app.infrastructure.storage.provider import get_file_storage
-from app.workers.queue import enqueue_enrich
+from app.workers.queue import enqueue_download, enqueue_enrich


 async def get_session() -> AsyncIterator[AsyncSession]:
@@ -136,9 +138,7 @@ def get_streaming_service(session: SessionDep, storage: FileStorageDep) -> Strea
    )


-def get_metadata_service(
-    session: SessionDep, storage: FileStorageDep
-) -> MetadataEnrichmentService:
+def get_metadata_service(session: SessionDep, storage: FileStorageDep) -> MetadataEnrichmentService:
    """Wires the §6.2 fingerprint/AcoustID adapters for read-only, inline use
    (the metadata editor's "find matches" — §A7). The full pipeline (incl.
    cover art) stays in the worker (`tasks/enrich_task.py`)."""
@@ -161,9 +161,21 @@ def get_metadata_service(
    )


+def get_download_service(session: SessionDep, storage: FileStorageDep) -> DownloadService:
+    return DownloadService(
+        jobs=SqlAlchemyDownloadJobRepository(session),
+        tracks=SqlAlchemyTrackRepository(session),
+        artists=SqlAlchemyArtistRepository(session),
+        storage=storage,
+        enqueue_download=enqueue_download,
+        enqueue_enrich=enqueue_enrich,
+    )
+
+
 UploadServiceDep = Annotated[UploadService, Depends(get_upload_service)]
 StreamingServiceDep = Annotated[StreamingService, Depends(get_streaming_service)]
 MetadataServiceDep = Annotated[MetadataEnrichmentService, Depends(get_metadata_service)]
+DownloadServiceDep = Annotated[DownloadService, Depends(get_download_service)]


 # -- library repository deps ---------------------------------------------------
@@ -0,0 +1,59 @@
+"""Schemas for the download job endpoints (§A5 download manager)."""
+
+import datetime as dt
+import uuid
+
+from pydantic import BaseModel, Field
+
+from app.domain.entities.download import DownloadJob
+
+
+class DownloadCreate(BaseModel):
+    """Request to download an item discovered on a fetch source."""
+
+    source: str
+    source_id: str = Field(min_length=1)
+    # Optional free-text the result came from — stored for display only.
+    query: str | None = None
+
+
+class DownloadJobOut(BaseModel):
+    id: uuid.UUID
+    source: str
+    source_id: str | None
+    query: str | None
+    status: str
+    progress: float
+    error_message: str | None
+    retry_count: int
+    track_id: uuid.UUID | None
+    created_at: dt.datetime
+    updated_at: dt.datetime
+
+    @classmethod
+    def from_entity(cls, job: DownloadJob) -> DownloadJobOut:
+        return cls(
+            id=job.id,
+            source=job.source,
+            source_id=job.source_id,
+            query=job.query,
+            status=job.status,
+            progress=job.progress,
+            error_message=job.error_message,
+            retry_count=job.retry_count,
+            track_id=job.track_id,
+            created_at=job.created_at,
+            updated_at=job.updated_at,
+        )
+
+
+class DownloadCreateResponse(BaseModel):
+    """Result of requesting a download.
+
+    ``already_in_library`` → the item was already imported (``track_id`` set, no
+    job). Otherwise ``job`` describes the queued (or already in-flight) download.
+    """
+
+    already_in_library: bool
+    track_id: uuid.UUID | None
+    job: DownloadJobOut | None
@@ -0,0 +1,35 @@
+"""Schemas for searching external (fetch) sources — the §A4 discover screen."""
+
+from pydantic import BaseModel
+
+from app.domain.sources import SearchResult
+
+
+class ExternalSearchResultOut(BaseModel):
+    source: str
+    source_id: str
+    title: str
+    artist: str | None
+    album: str | None
+    duration_seconds: int | None
+    thumbnail_url: str | None
+
+    @classmethod
+    def from_entity(cls, r: SearchResult) -> ExternalSearchResultOut:
+        return cls(
+            source=r.source,
+            source_id=r.source_id,
+            title=r.title,
+            artist=r.artist,
+            album=r.album,
+            duration_seconds=r.duration_seconds,
+            thumbnail_url=r.thumbnail_url,
+        )
+
+
+class ExternalSearchResponse(BaseModel):
+    """Flat list of hits across one or more searchable sources, plus the names of
+    sources that were unavailable (so the UI can show a soft warning)."""
+
+    results: list[ExternalSearchResultOut]
+    searched_sources: list[str]
@@ -1,36 +1,78 @@
-"""Download job endpoints. Heavy work is dispatched to arq workers."""
+"""Download job endpoints (§A5). Heavy work is dispatched to arq workers — these
+handlers only create/inspect/cancel/retry job records."""

 import uuid
-from typing import Any

-from fastapi import APIRouter
+from fastapi import APIRouter, Query, Response
+
+from app.api.deps import CurrentUser, DownloadServiceDep
+from app.api.schemas.download import DownloadCreate, DownloadCreateResponse, DownloadJobOut
+from app.api.schemas.pagination import PagedResponse

 router = APIRouter(prefix="/downloads", tags=["downloads"])


@router.get("")
-async def list_downloads() -> Any: ...
+async def list_downloads(
+    service: DownloadServiceDep,
+    user: CurrentUser,
+    status: str | None = Query(default=None),
+    mine: bool = Query(default=False),
+    limit: int = Query(50, ge=1, le=200),
+    offset: int = Query(0, ge=0),
+) -> PagedResponse[DownloadJobOut]:
+    jobs, total = await service.list(
+        requested_by=user.id if mine else None,
+        status=status,
+        limit=limit,
+        offset=offset,
+    )
+    return PagedResponse(
+        items=[DownloadJobOut.from_entity(j) for j in jobs],
+        total=total,
+        limit=limit,
+        offset=offset,
+    )


-@router.post("")
-async def create_download() -> Any: ...
+@router.post("", status_code=202)
+async def create_download(
+    body: DownloadCreate,
+    service: DownloadServiceDep,
+    user: CurrentUser,
+) -> DownloadCreateResponse:
+    result = await service.request(
+        source=body.source,
+        source_id=body.source_id,
+        query=body.query,
+        requested_by=user.id,
+    )
+    return DownloadCreateResponse(
+        already_in_library=result.already_in_library,
+        track_id=result.track_id,
+        job=DownloadJobOut.from_entity(result.job) if result.job is not None else None,
+    )


@router.get("/{job_id}")
-async def get_download(job_id: uuid.UUID) -> Any: ...
+async def get_download(
+    job_id: uuid.UUID, service: DownloadServiceDep, _: CurrentUser
+) -> DownloadJobOut:
+    job = await service.get(job_id)
+    return DownloadJobOut.from_entity(job)


-@router.delete("/{job_id}")
-async def cancel_download(job_id: uuid.UUID) -> Any: ...
+@router.delete("/{job_id}", status_code=204)
+async def cancel_download(
+    job_id: uuid.UUID, service: DownloadServiceDep, _: CurrentUser
+) -> Response:
+    await service.cancel(job_id)
+    return Response(status_code=204)


@router.post("/{job_id}/retry")
-async def retry_download(job_id: uuid.UUID) -> Any: ...
-
-
-@router.post("/pause")
-async def pause_downloads() -> Any: ...
-
-
-@router.post("/resume")
-async def resume_downloads() -> Any: ...
+async def retry_download(
+    job_id: uuid.UUID, service: DownloadServiceDep, _: CurrentUser
+) -> DownloadJobOut:
+    job = await service.retry(job_id)
+    return DownloadJobOut.from_entity(job)
@@ -1,12 +1,11 @@
 """Search endpoints: global and library-scoped."""

-from typing import Any
-
 from fastapi import APIRouter, Query

-from app.api.deps import AlbumRepoDep, ArtistRepoDep, CurrentUser, TrackRepoDep
+from app.api.deps import AlbumRepoDep, ArtistRepoDep, CurrentUser, SourceRegistryDep, TrackRepoDep
 from app.api.schemas.album import AlbumOut
 from app.api.schemas.artist import ArtistOut
+from app.api.schemas.external_search import ExternalSearchResponse, ExternalSearchResultOut
 from app.api.schemas.search import LibrarySearchResponse
 from app.api.schemas.track import TrackOut
 from app.api.v1.albums import _build_album_out
@@ -16,7 +15,26 @@ router = APIRouter(prefix="/search", tags=["search"])


@router.get("")
-async def search(_: CurrentUser) -> Any: ...
+async def search(
+    _: CurrentUser,
+    registry: SourceRegistryDep,
+    q: str = Query(min_length=1),
+    limit: int = Query(20, ge=1, le=50),
+) -> ExternalSearchResponse:
+    """Search every available fetch source and merge the hits (§A4 discover).
+
+    A source that is down contributes nothing rather than failing the whole
+    request (graceful degradation); only available sources are reported as
+    searched."""
+    results: list[ExternalSearchResultOut] = []
+    searched: list[str] = []
+    for backend in registry.searchables():
+        if not backend.is_available():
+            continue
+        searched.append(backend.name)
+        hits = await backend.search(q, limit=limit)
+        results.extend(ExternalSearchResultOut.from_entity(h) for h in hits)
+    return ExternalSearchResponse(results=results, searched_sources=searched)


@router.get("/library")
@@ -1,14 +1,13 @@
-"""External source endpoints: enumerate sources and trigger imports.
+"""External source endpoints: enumerate sources, search, and trigger imports.

-Listing/health are read-only (any authenticated user). Scanning a source is an
-admin action and runs in a worker — the endpoint only enqueues it.
+Listing/health/search are read-only (any authenticated user). Scanning a source
+is an admin action and runs in a worker — the endpoint only enqueues it.
 """

-from typing import Any
-
-from fastapi import APIRouter
+from fastapi import APIRouter, Query

 from app.api.deps import CurrentUser, SourceRegistryDep, SuperUser
+from app.api.schemas.external_search import ExternalSearchResponse, ExternalSearchResultOut
 from app.api.schemas.source import ScanResponse, SourceHealthOut, SourceInfoOut
 from app.domain.errors import DependencyUnavailableError
 from app.workers.queue import enqueue
@@ -39,6 +38,18 @@ async def source_health(


@router.get("/{source}/search")
-async def search_source(source: str, _: CurrentUser) -> Any:
-    # Search is for fetch-style sources (youtube, …) — not yet implemented.
-    ...
+async def search_source(
+    source: str,
+    _: CurrentUser,
+    registry: SourceRegistryDep,
+    q: str = Query(min_length=1),
+    limit: int = Query(20, ge=1, le=50),
+) -> ExternalSearchResponse:
+    backend = registry.searchable(source)  # 404 if unknown, 422 if not searchable
+    if not backend.is_available():
+        raise DependencyUnavailableError(f"Source {source!r} is not available.")
+    results = await backend.search(q, limit=limit)
+    return ExternalSearchResponse(
+        results=[ExternalSearchResultOut.from_entity(r) for r in results],
+        searched_sources=[source],
+    )
@@ -63,8 +63,7 @@ async def get_storage_stats(
        by_metadata_status=stats.by_metadata_status,
        by_source=stats.by_source,
        top_genres=[
-            GenreCountOut(genre=genre, track_count=count)
-            for genre, count in genres[:_TOP_GENRES]
+            GenreCountOut(genre=genre, track_count=count) for genre, count in genres[:_TOP_GENRES]
        ],
        disk=DiskUsageOut(total=disk.total, used=disk.used, free=disk.free) if disk else None,
    )
@@ -87,9 +87,7 @@ async def list_tracks(
        limit=limit,
        offset=offset,
    )
-    total = await track_repo.count(
-        artist_id=artist_id, album_id=album_id, q=q, source=source
-    )
+    total = await track_repo.count(artist_id=artist_id, album_id=album_id, q=q, source=source)

    artist_ids = list({t.artist_id for t in tracks})
    album_ids = list({t.album_id for t in tracks if t.album_id is not None})