feat(sources): YouTube Music search + download pipeline (§1C/§1E)
Docker Build & Publish / build (push) Successful in 2m39s
Docker Build & Publish / push (push) Failing after 36s
Docker Build & Publish / Prune old image versions (push) Has been skipped

Pluggable fetch source: ytmusicapi search + yt-dlp download (cookies-file guard), DownloadJob entity/repo + DownloadService, download_task worker with exponential-backoff retries, and wired /search, /sources/{source}/search, and /downloads endpoints. Adds youtube_enabled/cookies config, yt-dlp+ytmusicapi deps, and the download_jobs.track_id migration. Snapshot also bundles in-progress storage/tracks/acoustid edits.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
Senko-san
2026-06-14 14:04:33 +03:00
parent ea880edd57
commit 78007461e1
32 changed files with 2645 additions and 819 deletions
+2
View File
@@ -2,6 +2,7 @@
from app.domain.entities.album import Album
from app.domain.entities.cover import CoverArt
from app.domain.entities.download import DownloadJob
from app.domain.entities.history import PlayHistoryEntry
from app.domain.entities.like import Like
from app.domain.entities.metadata import AudioTags, Fingerprint, RecordingMatch
@@ -22,6 +23,7 @@ __all__ = [
"CoverArt",
"Credentials",
"DiskUsage",
"DownloadJob",
"Fingerprint",
"FormatBreakdown",
"LibraryStats",
+26
View File
@@ -0,0 +1,26 @@
"""Download job domain entity (plan §6.1).
A queued fetch from an external source, tracked through its lifecycle so the UI
download manager (screen §A5) can show progress, errors, and retries. The
``status`` strings mirror :class:`~app.infrastructure.db.models.enums.DownloadStatus`.
"""
import datetime as dt
import uuid
from dataclasses import dataclass
@dataclass(frozen=True, slots=True)
class DownloadJob:
id: uuid.UUID
source: str
source_id: str | None
query: str | None
requested_by: uuid.UUID | None
status: str
progress: float
error_message: str | None
retry_count: int
track_id: uuid.UUID | None
created_at: dt.datetime
updated_at: dt.datetime
+79 -2
View File
@@ -7,7 +7,7 @@ are bound to these ports at the composition root (``app.api.deps``).
import datetime as dt
import uuid
from collections.abc import AsyncIterator, Iterator
from collections.abc import AsyncIterator, Awaitable, Callable, Iterator
from contextlib import AbstractAsyncContextManager
from pathlib import Path
from typing import Protocol
@@ -18,6 +18,7 @@ from app.domain.entities import (
CoverArt,
Credentials,
DiskUsage,
DownloadJob,
Fingerprint,
LibraryStats,
Like,
@@ -29,9 +30,14 @@ from app.domain.entities import (
User,
)
from app.domain.entities.track import Artist, Track
from app.domain.sources import SourceFile, SourceInfo
from app.domain.sources import DownloadResult, RawMetadata, SearchResult, SourceFile, SourceInfo
from app.domain.tokens import IssuedToken, TokenClaims, TokenType
# A fetch source reports download progress as a fraction in [0.0, 1.0]. It's a
# plain callback (not a port) because it's an inversion of control supplied per
# call by the worker, which persists it to the download job.
ProgressCallback = Callable[[float], Awaitable[None]]
class UserRepository(Protocol):
async def get_by_id(self, user_id: uuid.UUID) -> User | None: ...
@@ -275,6 +281,54 @@ class HistoryRepository(Protocol):
async def count(self, *, user_id: uuid.UUID) -> int: ...
class DownloadJobRepository(Protocol):
"""Persistence for download jobs (plan §6.1). Drives the §A5 download manager
and the worker's retry/backoff loop."""
async def add(
self,
*,
source: str,
source_id: str | None,
query: str | None,
requested_by: uuid.UUID | None,
) -> DownloadJob: ...
async def get_by_id(self, job_id: uuid.UUID) -> DownloadJob | None: ...
async def get_active_for_source(self, source: str, source_id: str) -> DownloadJob | None:
"""An unfinished (queued/downloading/enriching) job for the same item, if
any — used to dedup before enqueuing so a double-click can't queue twice."""
...
async def list(
self,
*,
requested_by: uuid.UUID | None,
status: str | None,
limit: int,
offset: int,
) -> list[DownloadJob]: ...
async def count(self, *, requested_by: uuid.UUID | None, status: str | None) -> int: ...
async def set_status(
self,
job_id: uuid.UUID,
*,
status: str,
error_message: str | None = None,
track_id: uuid.UUID | None = None,
) -> None: ...
async def set_progress(self, job_id: uuid.UUID, progress: float) -> None: ...
async def increment_retry(self, job_id: uuid.UUID) -> int:
"""Bump ``retry_count`` and return the new value."""
...
async def delete(self, job_id: uuid.UUID) -> None: ...
async def failure_rate(self, source: str, *, since: dt.datetime) -> float:
"""Fraction of jobs for ``source`` created since ``since`` that ended
``failed`` (0.0 when there are none) — drives the §A5 "source unhealthy"
banner."""
...
class SourceBackend(Protocol):
"""A registered source of tracks (mounted folder, YouTube, …).
@@ -293,6 +347,29 @@ class IndexableSource(SourceBackend, Protocol):
def scan(self) -> Iterator[SourceFile]: ...
class SearchableSource(SourceBackend, Protocol):
"""A source that can be searched by free text (e.g. YouTube Music).
Returns ``[]`` (never raises) on no results / the service being down — the
discover screen degrades to "nothing found" rather than erroring."""
async def search(self, query: str, *, limit: int) -> list[SearchResult]: ...
class FetchableSource(SourceBackend, Protocol):
"""A source that can download a previously-discovered item to local disk.
``fetch`` resolves a ``source_id`` (from a :class:`SearchResult`) into a file
and reports progress through ``on_progress``. It runs only in a worker (heavy
I/O) and raises on failure so the download task can retry with backoff."""
async def fetch(
self, source_id: str, *, on_progress: ProgressCallback | None = None
) -> DownloadResult: ...
async def get_metadata(self, source_id: str) -> RawMetadata | None: ...
# -- metadata enrichment (plan §6.2) -----------------------------------------
class AudioTagReader(Protocol):
"""Reads embedded tags from a local audio file. Returns ``None`` only when
+58 -2
View File
@@ -10,8 +10,14 @@ here — a source yields a file plus a minimal title; enrichment (plan §6.2) fi
the rest later, so this stays a thin discovery layer (CLAUDE.md: no duplicated
business logic)."""
from dataclasses import dataclass
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any
# A source's ``kind`` describes which ports it satisfies, so the UI/admin can
# tell an indexed folder from a searchable fetch-source. A backend may be both.
KIND_INDEXABLE = "indexable" # enumerates files already on disk (local folder)
KIND_FETCH = "fetch" # searches + downloads from an external service (YTM, …)
@dataclass(frozen=True, slots=True)
@@ -20,7 +26,7 @@ class SourceInfo:
name: str
label: str
kind: str # "indexable" (more kinds — search/download — arrive with youtube)
kind: str # KIND_INDEXABLE | KIND_FETCH
available: bool
@@ -37,3 +43,53 @@ class SourceFile:
suggested_title: str
file_format: str
file_size: int
@dataclass(frozen=True, slots=True)
class SearchResult:
"""One hit from a searchable source (plan §5), shown on the discover screen.
``source_id`` is the stable handle the same backend later resolves in
``fetch`` — it must round-trip a download request without re-searching.
``raw`` carries the backend's untouched payload for debugging / future use.
"""
source: str
source_id: str
title: str
artist: str | None
album: str | None
duration_seconds: int | None
thumbnail_url: str | None
raw: dict[str, Any] = field(default_factory=dict)
@dataclass(frozen=True, slots=True)
class RawMetadata:
"""Metadata a fetch-source can offer about an item *before* enrichment.
Best-effort and source-shaped — the canonical metadata still comes from the
enrichment pipeline (plan §6.2). Used to seed a more useful provisional
title than a bare id while a download is queued."""
title: str | None
artist: str | None
album: str | None
year: int | None
extra: dict[str, Any] = field(default_factory=dict)
@dataclass(frozen=True, slots=True)
class DownloadResult:
"""A file a fetch-source produced on local disk (plan §5).
``path`` is a temp file the caller owns: it is stored into managed storage
and then removed (same lifecycle as an upload). ``source_id`` is echoed back
because some backends only learn the canonical id during the download."""
source_id: str
path: Path
file_format: str
file_size: int
bitrate: int | None
suggested_title: str