"""``youtube`` source — YouTube Music search + download (plan §5). A *fetch* source: it searches YouTube Music (via ``ytmusicapi``, which returns clean song/artist/album/duration rows) and downloads the chosen item with ``yt-dlp``. The two libraries are synchronous, so every call is bounced to a worker thread (``anyio.to_thread``); the sync yt-dlp progress hook bridges back to the async progress callback via ``anyio.from_thread``. Both libraries are optional dependencies — if either is missing the source is simply *unavailable* (it never crashes import or the registry; graceful degradation per CLAUDE.md). The audio stream is stored **as-is** (YouTube serves lossy Opus/AAC; re-encoding would be lossy→lossy, plan §6.6). ``source_id`` is the YouTube ``videoId`` — stable, so a re-download of the same id is idempotent and dedups against an existing track. """ import functools import tempfile from collections.abc import Callable from pathlib import Path from typing import Any import anyio from app.core.logging import get_logger from app.domain.ports import ProgressCallback from app.domain.sources import ( KIND_FETCH, DownloadResult, RawMetadata, SearchResult, SourceInfo, ) from app.infrastructure.db.models.enums import TrackSource log = get_logger(__name__) # Functions a caller may inject for testing (defaults do the real library work). SearchFn = Callable[[str, int], list[dict[str, Any]]] # (video_id, tmp_dir, progress_hook, cookies_path) -> normalized download dict DownloadFn = Callable[[str, Path, Callable[[dict[str, Any]], None], Path | None], dict[str, Any]] def _libs_available() -> bool: try: import yt_dlp # noqa: F401 import ytmusicapi # noqa: F401 except ImportError: return False return True def _watch_url(video_id: str) -> str: return f"https://music.youtube.com/watch?v={video_id}" class YouTubeMusicSource: """Implements :class:`app.domain.ports.SearchableSource` and :class:`~app.domain.ports.FetchableSource`.""" name = TrackSource.YOUTUBE.value def __init__( self, *, cookies_path: Path | None = None, tmp_dir: Path | None = None, search_fn: SearchFn | None = None, download_fn: DownloadFn | None = None, ) -> None: self._cookies_path = cookies_path self._tmp_dir = tmp_dir self._search_fn = search_fn or _default_search self._download_fn = download_fn or _default_download # Only the real library path needs the deps; an injected fn is self-contained. self._injected = search_fn is not None or download_fn is not None def info(self) -> SourceInfo: return SourceInfo( name=self.name, label="YouTube Music", kind=KIND_FETCH, available=self.is_available(), ) def is_available(self) -> bool: return True if self._injected else _libs_available() async def search(self, query: str, *, limit: int) -> list[SearchResult]: query = query.strip() if not query: return [] try: rows = await anyio.to_thread.run_sync(functools.partial(self._search_fn, query, limit)) except Exception: # No results / service down → degrade to empty (plan §5, CLAUDE.md). log.warning("ytm_search_failed", query=query) return [] return [r for r in (self._to_result(row) for row in rows) if r is not None] async def fetch( self, source_id: str, *, on_progress: ProgressCallback | None = None ) -> DownloadResult: tmp_dir = self._tmp_dir or Path(tempfile.gettempdir()) def hook(d: dict[str, Any]) -> None: if on_progress is None or d.get("status") != "downloading": return total = d.get("total_bytes") or d.get("total_bytes_estimate") done = d.get("downloaded_bytes") if not total or done is None: return # Cap below 1.0 — the job only reaches 1.0 once stored + imported. frac = min(done / total, 0.99) # Bridge sync hook (worker thread) → async callback (event loop). anyio.from_thread.run(on_progress, frac) def _run() -> dict[str, Any]: return self._download_fn(source_id, tmp_dir, hook, self._cookies_path) info = await anyio.to_thread.run_sync(_run) path = Path(info["filepath"]) stat = await anyio.Path(path).stat() return DownloadResult( source_id=source_id, path=path, file_format=info["file_format"], file_size=stat.st_size, bitrate=info.get("bitrate"), suggested_title=info.get("title") or source_id, ) async def get_metadata(self, source_id: str) -> RawMetadata | None: # The search result already carries a usable title/artist, and the # canonical metadata comes from enrichment (§6.2). A dedicated lookup is # an optional refinement — skipped for now (returns None gracefully). return None def _to_result(self, row: dict[str, Any]) -> SearchResult | None: video_id = row.get("videoId") if not video_id: return None # non-playable row (e.g. a video without audio id) artists = row.get("artists") or [] artist = ", ".join(a["name"] for a in artists if a.get("name")) or None album = (row.get("album") or {}).get("name") if isinstance(row.get("album"), dict) else None thumbnails = row.get("thumbnails") or [] thumbnail = thumbnails[-1].get("url") if thumbnails else None return SearchResult( source=self.name, source_id=str(video_id), title=row.get("title") or "Unknown", artist=artist, album=album, duration_seconds=row.get("duration_seconds"), thumbnail_url=thumbnail, raw=row, ) def _default_search(query: str, limit: int) -> list[dict[str, Any]]: """Real ytmusicapi search (songs only). Runs in a worker thread.""" from ytmusicapi import YTMusic yt = YTMusic() # unauthenticated: public search needs no login results: list[dict[str, Any]] = yt.search(query, filter="songs", limit=limit) return results[:limit] def _default_download( video_id: str, tmp_dir: Path, progress_hook: Callable[[dict[str, Any]], None], cookies_path: Path | None, ) -> dict[str, Any]: """Real yt-dlp download of the best audio stream. Runs in a worker thread. Stores the original stream (no transcode — plan §6.3/§6.6). Returns a normalized dict the adapter maps to :class:`DownloadResult`. """ from yt_dlp import YoutubeDL opts: dict[str, Any] = { "format": "bestaudio/best", "outtmpl": str(tmp_dir / "%(id)s.%(ext)s"), "quiet": True, "no_warnings": True, "noprogress": True, "progress_hooks": [progress_hook], } # Use cookies only when the file is actually present: the path can be set # unconditionally (e.g. a mounted volume that may be empty) and downloads # still work without it — cookies just unlock age/region-restricted items. if cookies_path is not None and cookies_path.is_file(): opts["cookiefile"] = str(cookies_path) with YoutubeDL(opts) as ydl: info = ydl.extract_info(_watch_url(video_id), download=True) filepath = Path(ydl.prepare_filename(info)) abr = info.get("abr") return { "filepath": filepath, "file_format": filepath.suffix.lstrip(".").lower() or "m4a", "bitrate": int(abr) if abr else None, "title": info.get("title"), }