feat(sources): YouTube Music search + download pipeline (§1C/§1E)
Docker Build & Publish / build (push) Successful in 2m39s
Docker Build & Publish / push (push) Failing after 36s
Docker Build & Publish / Prune old image versions (push) Has been skipped

Pluggable fetch source: ytmusicapi search + yt-dlp download (cookies-file guard), DownloadJob entity/repo + DownloadService, download_task worker with exponential-backoff retries, and wired /search, /sources/{source}/search, and /downloads endpoints. Adds youtube_enabled/cookies config, yt-dlp+ytmusicapi deps, and the download_jobs.track_id migration. Snapshot also bundles in-progress storage/tracks/acoustid edits.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
Senko-san
2026-06-14 14:04:33 +03:00
parent ea880edd57
commit 78007461e1
32 changed files with 2645 additions and 819 deletions
+183
View File
@@ -0,0 +1,183 @@
"""DownloadService — request external downloads and import their results.
Two roles (plan §6.1):
* **Request side** (HTTP): validate + dedup a download request, create a
``queued`` job, and enqueue the worker. Dedup is on ``(source, source_id)``
against both the library (already imported) and in-flight jobs (a double-click
must not queue twice) — idempotency per CLAUDE.md.
* **Worker side**: ``store_result`` turns a backend's :class:`DownloadResult`
into a managed file + minimal ``pending`` track (sibling of
:class:`~app.application.import_service.LibraryImportService`); enrichment
(§6.2) fills the rest.
The fingerprint-level dedup (a different id that turns out to be the same audio)
happens later in enrichment, where the fingerprint is computed.
"""
import contextlib
import uuid
from collections.abc import Awaitable, Callable
from dataclasses import dataclass
import anyio
from app.core.logging import get_logger
from app.domain.entities.download import DownloadJob
from app.domain.errors import NotFoundError, ValidationError
from app.domain.ports import (
ArtistRepository,
DownloadJobRepository,
FileStorage,
TrackRepository,
)
from app.domain.sources import DownloadResult
log = get_logger(__name__)
_UNKNOWN_ARTIST = "Unknown Artist"
# (job_id) -> None — enqueue the download worker, deferred so the job row is
# committed before the worker reads it (same pattern as enrich).
DownloadEnqueuer = Callable[[uuid.UUID], Awaitable[None]]
EnrichEnqueuer = Callable[[uuid.UUID], Awaitable[None]]
@dataclass(frozen=True)
class DownloadRequest:
"""Outcome of asking for a download.
Exactly one of the three states holds: the item is already in the library
(``track_id`` set, ``already_in_library``), a job already covers it / was
just created (``job`` set), so the UI can route to the download manager.
"""
job: DownloadJob | None
track_id: uuid.UUID | None
already_in_library: bool
class DownloadService:
def __init__(
self,
*,
jobs: DownloadJobRepository,
tracks: TrackRepository,
artists: ArtistRepository,
storage: FileStorage,
enqueue_download: DownloadEnqueuer | None = None,
enqueue_enrich: EnrichEnqueuer | None = None,
) -> None:
self._jobs = jobs
self._tracks = tracks
self._artists = artists
self._storage = storage
self._enqueue_download = enqueue_download
self._enqueue_enrich = enqueue_enrich
# -- request side ---------------------------------------------------------
async def request(
self,
*,
source: str,
source_id: str,
query: str | None,
requested_by: uuid.UUID | None,
) -> DownloadRequest:
source_id = source_id.strip()
if not source_id:
raise ValidationError("A source_id is required to download.")
existing = await self._tracks.get_by_source(source, source_id)
if existing is not None:
return DownloadRequest(job=None, track_id=existing.id, already_in_library=True)
active = await self._jobs.get_active_for_source(source, source_id)
if active is not None:
return DownloadRequest(job=active, track_id=None, already_in_library=False)
job = await self._jobs.add(
source=source,
source_id=source_id,
query=query,
requested_by=requested_by,
)
if self._enqueue_download is not None:
await self._enqueue_download(job.id)
return DownloadRequest(job=job, track_id=None, already_in_library=False)
async def list(
self,
*,
requested_by: uuid.UUID | None,
status: str | None,
limit: int,
offset: int,
) -> tuple[list[DownloadJob], int]:
jobs = await self._jobs.list(
requested_by=requested_by, status=status, limit=limit, offset=offset
)
total = await self._jobs.count(requested_by=requested_by, status=status)
return jobs, total
async def get(self, job_id: uuid.UUID) -> DownloadJob:
job = await self._jobs.get_by_id(job_id)
if job is None:
raise NotFoundError(f"Download job {job_id} not found.")
return job
async def cancel(self, job_id: uuid.UUID) -> None:
"""Remove the job record. True mid-flight cancellation of an in-progress
yt-dlp download is out of scope (MVP); the worker tolerates a vanished
job row (its status writes become no-ops)."""
job = await self._jobs.get_by_id(job_id)
if job is None:
raise NotFoundError(f"Download job {job_id} not found.")
await self._jobs.delete(job_id)
async def retry(self, job_id: uuid.UUID) -> DownloadJob:
job = await self.get(job_id)
await self._jobs.set_status(job_id, status="queued", error_message=None)
if self._enqueue_download is not None:
await self._enqueue_download(job_id)
refreshed = await self._jobs.get_by_id(job_id)
return refreshed if refreshed is not None else job
# -- worker side ----------------------------------------------------------
async def store_result(
self,
*,
source: str,
result: DownloadResult,
requested_by: uuid.UUID | None,
) -> uuid.UUID:
"""Store a freshly downloaded file and create a minimal ``pending`` track.
Returns the new track id (the caller enqueues enrichment after commit).
The temp file produced by the backend is always removed."""
track_id = uuid.uuid4()
key = f"tracks/{str(track_id)[:2]}/{track_id}.{result.file_format}"
try:
await self._storage.save_file(key, result.path)
try:
artist = await self._artists.get_or_create(_UNKNOWN_ARTIST)
await self._tracks.add(
id=track_id,
title=result.suggested_title,
artist_id=artist.id,
storage_uri=key,
file_format=result.file_format,
file_size=result.file_size,
source=source,
source_id=result.source_id,
metadata_status="pending",
added_by=requested_by,
)
except Exception:
with contextlib.suppress(Exception):
await self._storage.delete(key)
raise
finally:
with contextlib.suppress(Exception):
await anyio.Path(result.path).unlink(missing_ok=True)
return track_id