feat(sources): YouTube Music search + download pipeline (§1C/§1E)

Pluggable fetch source: ytmusicapi search + yt-dlp download (cookies-file guard), DownloadJob entity/repo + DownloadService, download_task worker with exponential-backoff retries, and wired /search, /sources/{source}/search, and /downloads endpoints. Adds youtube_enabled/cookies config, yt-dlp+ytmusicapi deps, and the download_jobs.track_id migration. Snapshot also bundles in-progress storage/tracks/acoustid edits. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-14 14:04:33 +03:00
parent ea880edd57
commit 78007461e1
32 changed files with 2645 additions and 819 deletions
@@ -0,0 +1,252 @@
+"""Integration tests for downloads + external search.
+
+Requires a reachable Postgres; skips otherwise. The download worker task is
+invoked directly (no Redis needed) against a fake fetch source, so the full
+DB + storage import path is covered without touching the network.
+"""
+
+import asyncio
+import os
+from collections.abc import AsyncIterator
+from pathlib import Path
+from typing import Any
+
+import pytest
+from app.core.config import get_settings
+from app.domain.sources import KIND_FETCH, DownloadResult, SearchResult, SourceInfo
+from app.infrastructure.db import Base, dispose_engine, get_engine, session_scope
+from app.infrastructure.db.repositories import (
+    SqlAlchemyRefreshTokenRepository,
+    SqlAlchemyUserRepository,
+)
+from app.infrastructure.sources.registry import SourceRegistry
+from asgi_lifespan import LifespanManager
+from httpx import ASGITransport, AsyncClient
+
+pytestmark = pytest.mark.asyncio
+
+_db_reachable_cache: bool | None = None
+
+
+async def _db_reachable() -> bool:
+    global _db_reachable_cache
+    if _db_reachable_cache is not None:
+        return _db_reachable_cache
+    from sqlalchemy import text
+
+    try:
+        async with asyncio.timeout(3):
+            async with get_engine().connect() as conn:
+                await conn.execute(text("SELECT 1"))
+        _db_reachable_cache = True
+    except Exception:
+        _db_reachable_cache = False
+    return _db_reachable_cache
+
+
+class FakeFetchSource:
+    """A searchable + fetchable source that writes a local file (no network)."""
+
+    name = "youtube"
+
+    def __init__(self, tmp_dir: Path) -> None:
+        self._tmp_dir = tmp_dir
+
+    def info(self) -> SourceInfo:
+        return SourceInfo(name=self.name, label="YouTube Music", kind=KIND_FETCH, available=True)
+
+    def is_available(self) -> bool:
+        return True
+
+    async def search(self, query: str, *, limit: int) -> list[SearchResult]:
+        return [
+            SearchResult(
+                source=self.name,
+                source_id="vid-1",
+                title=f"{query} song",
+                artist="Some Artist",
+                album="Some Album",
+                duration_seconds=200,
+                thumbnail_url="http://img/large.jpg",
+            )
+        ]
+
+    async def fetch(self, source_id: str, *, on_progress: Any = None) -> DownloadResult:
+        path = self._tmp_dir / f"{source_id}.m4a"
+        path.write_bytes(b"downloaded audio bytes" * 8)
+        if on_progress is not None:
+            await on_progress(0.5)
+        return DownloadResult(
+            source_id=source_id,
+            path=path,
+            file_format="webm",
+            file_size=path.stat().st_size,
+            bitrate=160,
+            suggested_title=f"Title for {source_id}",
+        )
+
+    async def get_metadata(self, source_id: str) -> None:
+        return None
+
+
+@pytest.fixture
+async def api(tmp_path: Path) -> AsyncIterator[AsyncClient]:
+    if not await _db_reachable():
+        pytest.skip("Postgres not reachable — integration test skipped.")
+
+    media = tmp_path / "media"
+    media.mkdir()
+    os.environ["MEDIA_PATH"] = str(media)
+    get_settings.cache_clear()
+
+    import app.infrastructure.storage.provider as _storage_provider
+
+    _storage_provider._storage = None
+
+    try:
+        async with get_engine().begin() as conn:
+            await conn.run_sync(Base.metadata.drop_all)
+            await conn.run_sync(Base.metadata.create_all)
+
+        from app.application.user_service import UserService
+        from app.core.security import Argon2PasswordHasher
+
+        async with session_scope() as session:
+            await UserService(
+                users=SqlAlchemyUserRepository(session),
+                refresh_tokens=SqlAlchemyRefreshTokenRepository(session),
+                hasher=Argon2PasswordHasher(),
+            ).create_user(username="admin", password="adminpass1", is_superuser=True)
+
+        from app.api.deps import get_source_registry
+        from app.main import create_app
+
+        app = create_app()
+        # Inject a fake fetch source so search/download never hit the network.
+        fake_registry = SourceRegistry([FakeFetchSource(tmp_path / "dl")])  # type: ignore[list-item]
+        (tmp_path / "dl").mkdir()
+        app.dependency_overrides[get_source_registry] = lambda: fake_registry
+
+        async with LifespanManager(app):
+            transport = ASGITransport(app=app)
+            async with AsyncClient(transport=transport, base_url="http://test") as client:
+                yield client
+
+        async with get_engine().begin() as conn:
+            await conn.run_sync(Base.metadata.drop_all)
+        await dispose_engine()
+    finally:
+        _storage_provider._storage = None
+        os.environ.pop("MEDIA_PATH", None)
+        get_settings.cache_clear()
+
+
+async def _login(api: AsyncClient) -> str:
+    resp = await api.post(
+        "/api/v1/auth/login", json={"username": "admin", "password": "adminpass1"}
+    )
+    assert resp.status_code == 200
+    return str(resp.json()["access_token"])
+
+
+async def test_search_aggregates_fetch_sources(api: AsyncClient) -> None:
+    token = await _login(api)
+    headers = {"Authorization": f"Bearer {token}"}
+
+    resp = await api.get("/api/v1/search", params={"q": "queen"}, headers=headers)
+    assert resp.status_code == 200
+    body = resp.json()
+    assert body["searched_sources"] == ["youtube"]
+    assert len(body["results"]) == 1
+    hit = body["results"][0]
+    assert hit["source"] == "youtube"
+    assert hit["source_id"] == "vid-1"
+    assert hit["title"] == "queen song"
+
+
+async def test_source_scoped_search(api: AsyncClient) -> None:
+    token = await _login(api)
+    headers = {"Authorization": f"Bearer {token}"}
+    resp = await api.get("/api/v1/sources/youtube/search", params={"q": "abba"}, headers=headers)
+    assert resp.status_code == 200
+    assert resp.json()["results"][0]["title"] == "abba song"
+
+
+async def test_download_create_list_and_complete(
+    api: AsyncClient, tmp_path: Path, monkeypatch: pytest.MonkeyPatch
+) -> None:
+    token = await _login(api)
+    headers = {"Authorization": f"Bearer {token}"}
+
+    # Request a download — Redis is absent, so enqueue degrades but the job persists.
+    create = await api.post(
+        "/api/v1/downloads",
+        json={"source": "youtube", "source_id": "vid-1", "query": "queen"},
+        headers=headers,
+    )
+    assert create.status_code == 202
+    body = create.json()
+    assert body["already_in_library"] is False
+    job_id = body["job"]["id"]
+    assert body["job"]["status"] == "queued"
+
+    # It shows up in the listing.
+    listing = await api.get("/api/v1/downloads", headers=headers)
+    assert listing.status_code == 200
+    assert any(j["id"] == job_id for j in listing.json()["items"])
+
+    # A duplicate request returns the same in-flight job, not a new one.
+    dup = await api.post(
+        "/api/v1/downloads",
+        json={"source": "youtube", "source_id": "vid-1"},
+        headers=headers,
+    )
+    assert dup.json()["job"]["id"] == job_id
+
+    # Run the worker task directly (bypasses Redis) with the fake fetch source.
+    import app.workers.tasks.download_task as dl_task
+
+    worker_dl = tmp_path / "worker-dl"
+    worker_dl.mkdir()
+    fake = SourceRegistry([FakeFetchSource(worker_dl)])  # type: ignore[list-item]
+    monkeypatch.setattr(dl_task, "build_source_registry", lambda _settings: fake)
+
+    result = await dl_task.download_track({}, job_id=job_id)
+    assert result["status"] == "done"
+    track_id = result["track_id"]
+
+    # The job is now done and linked to the imported track.
+    got = await api.get(f"/api/v1/downloads/{job_id}", headers=headers)
+    assert got.json()["status"] == "done"
+    assert got.json()["track_id"] == track_id
+
+    # The imported track streams back.
+    stream = await api.get(f"/api/v1/stream/{track_id}", headers=headers)
+    assert stream.status_code == 200
+    assert len(stream.content) > 0
+
+    # A new request for the same item now dedups against the library.
+    again = await api.post(
+        "/api/v1/downloads",
+        json={"source": "youtube", "source_id": "vid-1"},
+        headers=headers,
+    )
+    assert again.json()["already_in_library"] is True
+    assert again.json()["track_id"] == track_id
+
+
+async def test_cancel_download(api: AsyncClient) -> None:
+    token = await _login(api)
+    headers = {"Authorization": f"Bearer {token}"}
+    create = await api.post(
+        "/api/v1/downloads",
+        json={"source": "youtube", "source_id": "vid-cancel"},
+        headers=headers,
+    )
+    job_id = create.json()["job"]["id"]
+
+    cancel = await api.delete(f"/api/v1/downloads/{job_id}", headers=headers)
+    assert cancel.status_code == 204
+
+    got = await api.get(f"/api/v1/downloads/{job_id}", headers=headers)
+    assert got.status_code == 404