feat(sources): YouTube Music search + download pipeline (§1C/§1E)
Pluggable fetch source: ytmusicapi search + yt-dlp download (cookies-file guard), DownloadJob entity/repo + DownloadService, download_task worker with exponential-backoff retries, and wired /search, /sources/{source}/search, and /downloads endpoints. Adds youtube_enabled/cookies config, yt-dlp+ytmusicapi deps, and the download_jobs.track_id migration. Snapshot also bundles in-progress storage/tracks/acoustid edits.
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -2,6 +2,7 @@
|
||||
|
||||
from app.domain.entities.album import Album
|
||||
from app.domain.entities.cover import CoverArt
|
||||
from app.domain.entities.download import DownloadJob
|
||||
from app.domain.entities.history import PlayHistoryEntry
|
||||
from app.domain.entities.like import Like
|
||||
from app.domain.entities.metadata import AudioTags, Fingerprint, RecordingMatch
|
||||
@@ -22,6 +23,7 @@ __all__ = [
|
||||
"CoverArt",
|
||||
"Credentials",
|
||||
"DiskUsage",
|
||||
"DownloadJob",
|
||||
"Fingerprint",
|
||||
"FormatBreakdown",
|
||||
"LibraryStats",
|
||||
|
||||
@@ -0,0 +1,26 @@
|
||||
"""Download job domain entity (plan §6.1).
|
||||
|
||||
A queued fetch from an external source, tracked through its lifecycle so the UI
|
||||
download manager (screen §A5) can show progress, errors, and retries. The
|
||||
``status`` strings mirror :class:`~app.infrastructure.db.models.enums.DownloadStatus`.
|
||||
"""
|
||||
|
||||
import datetime as dt
|
||||
import uuid
|
||||
from dataclasses import dataclass
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class DownloadJob:
|
||||
id: uuid.UUID
|
||||
source: str
|
||||
source_id: str | None
|
||||
query: str | None
|
||||
requested_by: uuid.UUID | None
|
||||
status: str
|
||||
progress: float
|
||||
error_message: str | None
|
||||
retry_count: int
|
||||
track_id: uuid.UUID | None
|
||||
created_at: dt.datetime
|
||||
updated_at: dt.datetime
|
||||
+79
-2
@@ -7,7 +7,7 @@ are bound to these ports at the composition root (``app.api.deps``).
|
||||
|
||||
import datetime as dt
|
||||
import uuid
|
||||
from collections.abc import AsyncIterator, Iterator
|
||||
from collections.abc import AsyncIterator, Awaitable, Callable, Iterator
|
||||
from contextlib import AbstractAsyncContextManager
|
||||
from pathlib import Path
|
||||
from typing import Protocol
|
||||
@@ -18,6 +18,7 @@ from app.domain.entities import (
|
||||
CoverArt,
|
||||
Credentials,
|
||||
DiskUsage,
|
||||
DownloadJob,
|
||||
Fingerprint,
|
||||
LibraryStats,
|
||||
Like,
|
||||
@@ -29,9 +30,14 @@ from app.domain.entities import (
|
||||
User,
|
||||
)
|
||||
from app.domain.entities.track import Artist, Track
|
||||
from app.domain.sources import SourceFile, SourceInfo
|
||||
from app.domain.sources import DownloadResult, RawMetadata, SearchResult, SourceFile, SourceInfo
|
||||
from app.domain.tokens import IssuedToken, TokenClaims, TokenType
|
||||
|
||||
# A fetch source reports download progress as a fraction in [0.0, 1.0]. It's a
|
||||
# plain callback (not a port) because it's an inversion of control supplied per
|
||||
# call by the worker, which persists it to the download job.
|
||||
ProgressCallback = Callable[[float], Awaitable[None]]
|
||||
|
||||
|
||||
class UserRepository(Protocol):
|
||||
async def get_by_id(self, user_id: uuid.UUID) -> User | None: ...
|
||||
@@ -275,6 +281,54 @@ class HistoryRepository(Protocol):
|
||||
async def count(self, *, user_id: uuid.UUID) -> int: ...
|
||||
|
||||
|
||||
class DownloadJobRepository(Protocol):
|
||||
"""Persistence for download jobs (plan §6.1). Drives the §A5 download manager
|
||||
and the worker's retry/backoff loop."""
|
||||
|
||||
async def add(
|
||||
self,
|
||||
*,
|
||||
source: str,
|
||||
source_id: str | None,
|
||||
query: str | None,
|
||||
requested_by: uuid.UUID | None,
|
||||
) -> DownloadJob: ...
|
||||
async def get_by_id(self, job_id: uuid.UUID) -> DownloadJob | None: ...
|
||||
async def get_active_for_source(self, source: str, source_id: str) -> DownloadJob | None:
|
||||
"""An unfinished (queued/downloading/enriching) job for the same item, if
|
||||
any — used to dedup before enqueuing so a double-click can't queue twice."""
|
||||
...
|
||||
|
||||
async def list(
|
||||
self,
|
||||
*,
|
||||
requested_by: uuid.UUID | None,
|
||||
status: str | None,
|
||||
limit: int,
|
||||
offset: int,
|
||||
) -> list[DownloadJob]: ...
|
||||
async def count(self, *, requested_by: uuid.UUID | None, status: str | None) -> int: ...
|
||||
async def set_status(
|
||||
self,
|
||||
job_id: uuid.UUID,
|
||||
*,
|
||||
status: str,
|
||||
error_message: str | None = None,
|
||||
track_id: uuid.UUID | None = None,
|
||||
) -> None: ...
|
||||
async def set_progress(self, job_id: uuid.UUID, progress: float) -> None: ...
|
||||
async def increment_retry(self, job_id: uuid.UUID) -> int:
|
||||
"""Bump ``retry_count`` and return the new value."""
|
||||
...
|
||||
|
||||
async def delete(self, job_id: uuid.UUID) -> None: ...
|
||||
async def failure_rate(self, source: str, *, since: dt.datetime) -> float:
|
||||
"""Fraction of jobs for ``source`` created since ``since`` that ended
|
||||
``failed`` (0.0 when there are none) — drives the §A5 "source unhealthy"
|
||||
banner."""
|
||||
...
|
||||
|
||||
|
||||
class SourceBackend(Protocol):
|
||||
"""A registered source of tracks (mounted folder, YouTube, …).
|
||||
|
||||
@@ -293,6 +347,29 @@ class IndexableSource(SourceBackend, Protocol):
|
||||
def scan(self) -> Iterator[SourceFile]: ...
|
||||
|
||||
|
||||
class SearchableSource(SourceBackend, Protocol):
|
||||
"""A source that can be searched by free text (e.g. YouTube Music).
|
||||
|
||||
Returns ``[]`` (never raises) on no results / the service being down — the
|
||||
discover screen degrades to "nothing found" rather than erroring."""
|
||||
|
||||
async def search(self, query: str, *, limit: int) -> list[SearchResult]: ...
|
||||
|
||||
|
||||
class FetchableSource(SourceBackend, Protocol):
|
||||
"""A source that can download a previously-discovered item to local disk.
|
||||
|
||||
``fetch`` resolves a ``source_id`` (from a :class:`SearchResult`) into a file
|
||||
and reports progress through ``on_progress``. It runs only in a worker (heavy
|
||||
I/O) and raises on failure so the download task can retry with backoff."""
|
||||
|
||||
async def fetch(
|
||||
self, source_id: str, *, on_progress: ProgressCallback | None = None
|
||||
) -> DownloadResult: ...
|
||||
|
||||
async def get_metadata(self, source_id: str) -> RawMetadata | None: ...
|
||||
|
||||
|
||||
# -- metadata enrichment (plan §6.2) -----------------------------------------
|
||||
class AudioTagReader(Protocol):
|
||||
"""Reads embedded tags from a local audio file. Returns ``None`` only when
|
||||
|
||||
+58
-2
@@ -10,8 +10,14 @@ here — a source yields a file plus a minimal title; enrichment (plan §6.2) fi
|
||||
the rest later, so this stays a thin discovery layer (CLAUDE.md: no duplicated
|
||||
business logic)."""
|
||||
|
||||
from dataclasses import dataclass
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
# A source's ``kind`` describes which ports it satisfies, so the UI/admin can
|
||||
# tell an indexed folder from a searchable fetch-source. A backend may be both.
|
||||
KIND_INDEXABLE = "indexable" # enumerates files already on disk (local folder)
|
||||
KIND_FETCH = "fetch" # searches + downloads from an external service (YTM, …)
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
@@ -20,7 +26,7 @@ class SourceInfo:
|
||||
|
||||
name: str
|
||||
label: str
|
||||
kind: str # "indexable" (more kinds — search/download — arrive with youtube)
|
||||
kind: str # KIND_INDEXABLE | KIND_FETCH
|
||||
available: bool
|
||||
|
||||
|
||||
@@ -37,3 +43,53 @@ class SourceFile:
|
||||
suggested_title: str
|
||||
file_format: str
|
||||
file_size: int
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class SearchResult:
|
||||
"""One hit from a searchable source (plan §5), shown on the discover screen.
|
||||
|
||||
``source_id`` is the stable handle the same backend later resolves in
|
||||
``fetch`` — it must round-trip a download request without re-searching.
|
||||
``raw`` carries the backend's untouched payload for debugging / future use.
|
||||
"""
|
||||
|
||||
source: str
|
||||
source_id: str
|
||||
title: str
|
||||
artist: str | None
|
||||
album: str | None
|
||||
duration_seconds: int | None
|
||||
thumbnail_url: str | None
|
||||
raw: dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class RawMetadata:
|
||||
"""Metadata a fetch-source can offer about an item *before* enrichment.
|
||||
|
||||
Best-effort and source-shaped — the canonical metadata still comes from the
|
||||
enrichment pipeline (plan §6.2). Used to seed a more useful provisional
|
||||
title than a bare id while a download is queued."""
|
||||
|
||||
title: str | None
|
||||
artist: str | None
|
||||
album: str | None
|
||||
year: int | None
|
||||
extra: dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class DownloadResult:
|
||||
"""A file a fetch-source produced on local disk (plan §5).
|
||||
|
||||
``path`` is a temp file the caller owns: it is stored into managed storage
|
||||
and then removed (same lifecycle as an upload). ``source_id`` is echoed back
|
||||
because some backends only learn the canonical id during the download."""
|
||||
|
||||
source_id: str
|
||||
path: Path
|
||||
file_format: str
|
||||
file_size: int
|
||||
bitrate: int | None
|
||||
suggested_title: str
|
||||
|
||||
Reference in New Issue
Block a user