feat(sources): YouTube Music search + download pipeline (§1C/§1E)
Pluggable fetch source: ytmusicapi search + yt-dlp download (cookies-file guard), DownloadJob entity/repo + DownloadService, download_task worker with exponential-backoff retries, and wired /search, /sources/{source}/search, and /downloads endpoints. Adds youtube_enabled/cookies config, yt-dlp+ytmusicapi deps, and the download_jobs.track_id migration. Snapshot also bundles in-progress storage/tracks/acoustid edits.
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,183 @@
|
||||
"""DownloadService — request external downloads and import their results.
|
||||
|
||||
Two roles (plan §6.1):
|
||||
|
||||
* **Request side** (HTTP): validate + dedup a download request, create a
|
||||
``queued`` job, and enqueue the worker. Dedup is on ``(source, source_id)``
|
||||
against both the library (already imported) and in-flight jobs (a double-click
|
||||
must not queue twice) — idempotency per CLAUDE.md.
|
||||
* **Worker side**: ``store_result`` turns a backend's :class:`DownloadResult`
|
||||
into a managed file + minimal ``pending`` track (sibling of
|
||||
:class:`~app.application.import_service.LibraryImportService`); enrichment
|
||||
(§6.2) fills the rest.
|
||||
|
||||
The fingerprint-level dedup (a different id that turns out to be the same audio)
|
||||
happens later in enrichment, where the fingerprint is computed.
|
||||
"""
|
||||
|
||||
import contextlib
|
||||
import uuid
|
||||
from collections.abc import Awaitable, Callable
|
||||
from dataclasses import dataclass
|
||||
|
||||
import anyio
|
||||
|
||||
from app.core.logging import get_logger
|
||||
from app.domain.entities.download import DownloadJob
|
||||
from app.domain.errors import NotFoundError, ValidationError
|
||||
from app.domain.ports import (
|
||||
ArtistRepository,
|
||||
DownloadJobRepository,
|
||||
FileStorage,
|
||||
TrackRepository,
|
||||
)
|
||||
from app.domain.sources import DownloadResult
|
||||
|
||||
log = get_logger(__name__)
|
||||
|
||||
_UNKNOWN_ARTIST = "Unknown Artist"
|
||||
|
||||
# (job_id) -> None — enqueue the download worker, deferred so the job row is
|
||||
# committed before the worker reads it (same pattern as enrich).
|
||||
DownloadEnqueuer = Callable[[uuid.UUID], Awaitable[None]]
|
||||
EnrichEnqueuer = Callable[[uuid.UUID], Awaitable[None]]
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class DownloadRequest:
|
||||
"""Outcome of asking for a download.
|
||||
|
||||
Exactly one of the three states holds: the item is already in the library
|
||||
(``track_id`` set, ``already_in_library``), a job already covers it / was
|
||||
just created (``job`` set), so the UI can route to the download manager.
|
||||
"""
|
||||
|
||||
job: DownloadJob | None
|
||||
track_id: uuid.UUID | None
|
||||
already_in_library: bool
|
||||
|
||||
|
||||
class DownloadService:
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
jobs: DownloadJobRepository,
|
||||
tracks: TrackRepository,
|
||||
artists: ArtistRepository,
|
||||
storage: FileStorage,
|
||||
enqueue_download: DownloadEnqueuer | None = None,
|
||||
enqueue_enrich: EnrichEnqueuer | None = None,
|
||||
) -> None:
|
||||
self._jobs = jobs
|
||||
self._tracks = tracks
|
||||
self._artists = artists
|
||||
self._storage = storage
|
||||
self._enqueue_download = enqueue_download
|
||||
self._enqueue_enrich = enqueue_enrich
|
||||
|
||||
# -- request side ---------------------------------------------------------
|
||||
async def request(
|
||||
self,
|
||||
*,
|
||||
source: str,
|
||||
source_id: str,
|
||||
query: str | None,
|
||||
requested_by: uuid.UUID | None,
|
||||
) -> DownloadRequest:
|
||||
source_id = source_id.strip()
|
||||
if not source_id:
|
||||
raise ValidationError("A source_id is required to download.")
|
||||
|
||||
existing = await self._tracks.get_by_source(source, source_id)
|
||||
if existing is not None:
|
||||
return DownloadRequest(job=None, track_id=existing.id, already_in_library=True)
|
||||
|
||||
active = await self._jobs.get_active_for_source(source, source_id)
|
||||
if active is not None:
|
||||
return DownloadRequest(job=active, track_id=None, already_in_library=False)
|
||||
|
||||
job = await self._jobs.add(
|
||||
source=source,
|
||||
source_id=source_id,
|
||||
query=query,
|
||||
requested_by=requested_by,
|
||||
)
|
||||
if self._enqueue_download is not None:
|
||||
await self._enqueue_download(job.id)
|
||||
return DownloadRequest(job=job, track_id=None, already_in_library=False)
|
||||
|
||||
async def list(
|
||||
self,
|
||||
*,
|
||||
requested_by: uuid.UUID | None,
|
||||
status: str | None,
|
||||
limit: int,
|
||||
offset: int,
|
||||
) -> tuple[list[DownloadJob], int]:
|
||||
jobs = await self._jobs.list(
|
||||
requested_by=requested_by, status=status, limit=limit, offset=offset
|
||||
)
|
||||
total = await self._jobs.count(requested_by=requested_by, status=status)
|
||||
return jobs, total
|
||||
|
||||
async def get(self, job_id: uuid.UUID) -> DownloadJob:
|
||||
job = await self._jobs.get_by_id(job_id)
|
||||
if job is None:
|
||||
raise NotFoundError(f"Download job {job_id} not found.")
|
||||
return job
|
||||
|
||||
async def cancel(self, job_id: uuid.UUID) -> None:
|
||||
"""Remove the job record. True mid-flight cancellation of an in-progress
|
||||
yt-dlp download is out of scope (MVP); the worker tolerates a vanished
|
||||
job row (its status writes become no-ops)."""
|
||||
job = await self._jobs.get_by_id(job_id)
|
||||
if job is None:
|
||||
raise NotFoundError(f"Download job {job_id} not found.")
|
||||
await self._jobs.delete(job_id)
|
||||
|
||||
async def retry(self, job_id: uuid.UUID) -> DownloadJob:
|
||||
job = await self.get(job_id)
|
||||
await self._jobs.set_status(job_id, status="queued", error_message=None)
|
||||
if self._enqueue_download is not None:
|
||||
await self._enqueue_download(job_id)
|
||||
refreshed = await self._jobs.get_by_id(job_id)
|
||||
return refreshed if refreshed is not None else job
|
||||
|
||||
# -- worker side ----------------------------------------------------------
|
||||
async def store_result(
|
||||
self,
|
||||
*,
|
||||
source: str,
|
||||
result: DownloadResult,
|
||||
requested_by: uuid.UUID | None,
|
||||
) -> uuid.UUID:
|
||||
"""Store a freshly downloaded file and create a minimal ``pending`` track.
|
||||
|
||||
Returns the new track id (the caller enqueues enrichment after commit).
|
||||
The temp file produced by the backend is always removed."""
|
||||
track_id = uuid.uuid4()
|
||||
key = f"tracks/{str(track_id)[:2]}/{track_id}.{result.file_format}"
|
||||
try:
|
||||
await self._storage.save_file(key, result.path)
|
||||
try:
|
||||
artist = await self._artists.get_or_create(_UNKNOWN_ARTIST)
|
||||
await self._tracks.add(
|
||||
id=track_id,
|
||||
title=result.suggested_title,
|
||||
artist_id=artist.id,
|
||||
storage_uri=key,
|
||||
file_format=result.file_format,
|
||||
file_size=result.file_size,
|
||||
source=source,
|
||||
source_id=result.source_id,
|
||||
metadata_status="pending",
|
||||
added_by=requested_by,
|
||||
)
|
||||
except Exception:
|
||||
with contextlib.suppress(Exception):
|
||||
await self._storage.delete(key)
|
||||
raise
|
||||
finally:
|
||||
with contextlib.suppress(Exception):
|
||||
await anyio.Path(result.path).unlink(missing_ok=True)
|
||||
return track_id
|
||||
Reference in New Issue
Block a user