feat(sources): YouTube Music search + download pipeline (§1C/§1E)
Docker Build & Publish / build (push) Successful in 2m39s
Docker Build & Publish / push (push) Failing after 36s
Docker Build & Publish / Prune old image versions (push) Has been skipped

Pluggable fetch source: ytmusicapi search + yt-dlp download (cookies-file guard), DownloadJob entity/repo + DownloadService, download_task worker with exponential-backoff retries, and wired /search, /sources/{source}/search, and /downloads endpoints. Adds youtube_enabled/cookies config, yt-dlp+ytmusicapi deps, and the download_jobs.track_id migration. Snapshot also bundles in-progress storage/tracks/acoustid edits.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
Senko-san
2026-06-14 14:04:33 +03:00
parent ea880edd57
commit 78007461e1
32 changed files with 2645 additions and 819 deletions
+1 -3
View File
@@ -79,9 +79,7 @@ async def _create_test_db_if_missing() -> None:
except Exception:
return
try:
exists = await conn.fetchval(
"SELECT 1 FROM pg_database WHERE datname = $1", _TEST_DB_NAME
)
exists = await conn.fetchval("SELECT 1 FROM pg_database WHERE datname = $1", _TEST_DB_NAME)
if not exists:
# CREATE DATABASE can't run inside a transaction; asyncpg's implicit
# autocommit on a bare connection handles that.
+213
View File
@@ -0,0 +1,213 @@
"""Unit tests for DownloadService — DB-free, in-memory fakes."""
import datetime as dt
import uuid
from pathlib import Path
import pytest
from app.application.download_service import DownloadService
from app.domain.entities import Artist, Track
from app.domain.entities.download import DownloadJob
from app.domain.sources import DownloadResult
pytestmark = pytest.mark.asyncio
class FakeArtistRepo:
async def get_or_create(self, name: str) -> Artist:
now = dt.datetime.now(dt.UTC)
return Artist(id=uuid.uuid4(), name=name, created_at=now, updated_at=now)
class FakeTrackRepo:
def __init__(self) -> None:
self.by_source: dict[tuple[str, str], Track] = {}
self.added: list[Track] = []
async def get_by_source(self, source: str, source_id: str) -> Track | None:
return self.by_source.get((source, source_id))
async def add(self, **kw: object) -> Track:
now = dt.datetime.now(dt.UTC)
track = Track(
id=kw["id"], # type: ignore[arg-type]
title=str(kw["title"]),
artist_id=kw["artist_id"], # type: ignore[arg-type]
album_id=None,
storage_uri=str(kw["storage_uri"]),
file_format=str(kw["file_format"]),
file_size=int(kw["file_size"]), # type: ignore[call-overload]
source=str(kw["source"]),
source_id=str(kw["source_id"]),
duration_seconds=None,
genre=None,
year=None,
track_number=None,
metadata_status=str(kw["metadata_status"]),
metadata_error=None,
enriched_at=None,
created_at=now,
updated_at=now,
)
self.by_source[(track.source, track.source_id)] = track
self.added.append(track)
return track
class FakeStorage:
def __init__(self) -> None:
self.saved: dict[str, Path] = {}
self.deleted: list[str] = []
async def save_file(self, key: str, src_path: Path) -> int:
self.saved[key] = src_path
return 1
async def delete(self, key: str) -> None:
self.deleted.append(key)
class FakeJobRepo:
def __init__(self) -> None:
self.jobs: dict[uuid.UUID, DownloadJob] = {}
self.active: dict[tuple[str, str], DownloadJob] = {}
def _make(self, **kw: object) -> DownloadJob:
now = dt.datetime.now(dt.UTC)
return DownloadJob(
id=uuid.uuid4(),
source=str(kw["source"]),
source_id=kw.get("source_id"), # type: ignore[arg-type]
query=kw.get("query"), # type: ignore[arg-type]
requested_by=kw.get("requested_by"), # type: ignore[arg-type]
status="queued",
progress=0.0,
error_message=None,
retry_count=0,
track_id=None,
created_at=now,
updated_at=now,
)
async def add(self, **kw: object) -> DownloadJob:
job = self._make(**kw)
self.jobs[job.id] = job
return job
async def get_by_id(self, job_id: uuid.UUID) -> DownloadJob | None:
return self.jobs.get(job_id)
async def get_active_for_source(self, source: str, source_id: str) -> DownloadJob | None:
return self.active.get((source, source_id))
async def set_status(self, job_id: uuid.UUID, **kw: object) -> None: ...
async def delete(self, job_id: uuid.UUID) -> None:
self.jobs.pop(job_id, None)
def _service(
*, jobs: FakeJobRepo, tracks: FakeTrackRepo, storage: FakeStorage, enqueued: list[uuid.UUID]
) -> DownloadService:
async def enqueue_download(job_id: uuid.UUID) -> None:
enqueued.append(job_id)
return DownloadService(
jobs=jobs, # type: ignore[arg-type]
tracks=tracks, # type: ignore[arg-type]
artists=FakeArtistRepo(), # type: ignore[arg-type]
storage=storage, # type: ignore[arg-type]
enqueue_download=enqueue_download,
)
def _track(source: str, source_id: str) -> Track:
now = dt.datetime.now(dt.UTC)
return Track(
id=uuid.uuid4(),
title="t",
artist_id=uuid.uuid4(),
album_id=None,
storage_uri="k",
file_format="mp3",
file_size=1,
source=source,
source_id=source_id,
duration_seconds=None,
genre=None,
year=None,
track_number=None,
metadata_status="pending",
metadata_error=None,
enriched_at=None,
created_at=now,
updated_at=now,
)
async def test_request_dedups_against_library() -> None:
jobs, tracks, storage, enq = FakeJobRepo(), FakeTrackRepo(), FakeStorage(), []
tracks.by_source[("youtube", "abc")] = _track("youtube", "abc")
svc = _service(jobs=jobs, tracks=tracks, storage=storage, enqueued=enq)
result = await svc.request(source="youtube", source_id="abc", query=None, requested_by=None)
assert result.already_in_library is True
assert result.track_id is not None
assert result.job is None
assert enq == [] # nothing enqueued
async def test_request_returns_existing_active_job() -> None:
jobs, tracks, storage, enq = FakeJobRepo(), FakeTrackRepo(), FakeStorage(), []
existing = await jobs.add(source="youtube", source_id="abc", query=None, requested_by=None)
jobs.active[("youtube", "abc")] = existing
svc = _service(jobs=jobs, tracks=tracks, storage=storage, enqueued=enq)
result = await svc.request(source="youtube", source_id="abc", query=None, requested_by=None)
assert result.already_in_library is False
assert result.job is not None
assert result.job.id == existing.id
assert enq == [] # not re-enqueued
async def test_request_creates_and_enqueues_new_job() -> None:
jobs, tracks, storage, enq = FakeJobRepo(), FakeTrackRepo(), FakeStorage(), []
svc = _service(jobs=jobs, tracks=tracks, storage=storage, enqueued=enq)
result = await svc.request(
source="youtube", source_id="abc", query="bohemian", requested_by=None
)
assert result.already_in_library is False
assert result.job is not None
assert enq == [result.job.id]
async def test_store_result_imports_and_cleans_temp(tmp_path: Path) -> None:
jobs, tracks, storage, enq = FakeJobRepo(), FakeTrackRepo(), FakeStorage(), []
svc = _service(jobs=jobs, tracks=tracks, storage=storage, enqueued=enq)
audio = tmp_path / "abc.webm"
audio.write_bytes(b"audio" * 20)
result = DownloadResult(
source_id="abc",
path=audio,
file_format="m4a",
file_size=100,
bitrate=160,
suggested_title="Bohemian Rhapsody",
)
track_id = await svc.store_result(source="youtube", result=result, requested_by=None)
assert len(tracks.added) == 1
stored = tracks.added[0]
assert stored.id == track_id
assert stored.source == "youtube"
assert stored.source_id == "abc"
assert stored.metadata_status == "pending"
assert stored.title == "Bohemian Rhapsody"
assert len(storage.saved) == 1
assert not audio.exists() # temp file removed
+252
View File
@@ -0,0 +1,252 @@
"""Integration tests for downloads + external search.
Requires a reachable Postgres; skips otherwise. The download worker task is
invoked directly (no Redis needed) against a fake fetch source, so the full
DB + storage import path is covered without touching the network.
"""
import asyncio
import os
from collections.abc import AsyncIterator
from pathlib import Path
from typing import Any
import pytest
from app.core.config import get_settings
from app.domain.sources import KIND_FETCH, DownloadResult, SearchResult, SourceInfo
from app.infrastructure.db import Base, dispose_engine, get_engine, session_scope
from app.infrastructure.db.repositories import (
SqlAlchemyRefreshTokenRepository,
SqlAlchemyUserRepository,
)
from app.infrastructure.sources.registry import SourceRegistry
from asgi_lifespan import LifespanManager
from httpx import ASGITransport, AsyncClient
pytestmark = pytest.mark.asyncio
_db_reachable_cache: bool | None = None
async def _db_reachable() -> bool:
global _db_reachable_cache
if _db_reachable_cache is not None:
return _db_reachable_cache
from sqlalchemy import text
try:
async with asyncio.timeout(3):
async with get_engine().connect() as conn:
await conn.execute(text("SELECT 1"))
_db_reachable_cache = True
except Exception:
_db_reachable_cache = False
return _db_reachable_cache
class FakeFetchSource:
"""A searchable + fetchable source that writes a local file (no network)."""
name = "youtube"
def __init__(self, tmp_dir: Path) -> None:
self._tmp_dir = tmp_dir
def info(self) -> SourceInfo:
return SourceInfo(name=self.name, label="YouTube Music", kind=KIND_FETCH, available=True)
def is_available(self) -> bool:
return True
async def search(self, query: str, *, limit: int) -> list[SearchResult]:
return [
SearchResult(
source=self.name,
source_id="vid-1",
title=f"{query} song",
artist="Some Artist",
album="Some Album",
duration_seconds=200,
thumbnail_url="http://img/large.jpg",
)
]
async def fetch(self, source_id: str, *, on_progress: Any = None) -> DownloadResult:
path = self._tmp_dir / f"{source_id}.m4a"
path.write_bytes(b"downloaded audio bytes" * 8)
if on_progress is not None:
await on_progress(0.5)
return DownloadResult(
source_id=source_id,
path=path,
file_format="webm",
file_size=path.stat().st_size,
bitrate=160,
suggested_title=f"Title for {source_id}",
)
async def get_metadata(self, source_id: str) -> None:
return None
@pytest.fixture
async def api(tmp_path: Path) -> AsyncIterator[AsyncClient]:
if not await _db_reachable():
pytest.skip("Postgres not reachable — integration test skipped.")
media = tmp_path / "media"
media.mkdir()
os.environ["MEDIA_PATH"] = str(media)
get_settings.cache_clear()
import app.infrastructure.storage.provider as _storage_provider
_storage_provider._storage = None
try:
async with get_engine().begin() as conn:
await conn.run_sync(Base.metadata.drop_all)
await conn.run_sync(Base.metadata.create_all)
from app.application.user_service import UserService
from app.core.security import Argon2PasswordHasher
async with session_scope() as session:
await UserService(
users=SqlAlchemyUserRepository(session),
refresh_tokens=SqlAlchemyRefreshTokenRepository(session),
hasher=Argon2PasswordHasher(),
).create_user(username="admin", password="adminpass1", is_superuser=True)
from app.api.deps import get_source_registry
from app.main import create_app
app = create_app()
# Inject a fake fetch source so search/download never hit the network.
fake_registry = SourceRegistry([FakeFetchSource(tmp_path / "dl")]) # type: ignore[list-item]
(tmp_path / "dl").mkdir()
app.dependency_overrides[get_source_registry] = lambda: fake_registry
async with LifespanManager(app):
transport = ASGITransport(app=app)
async with AsyncClient(transport=transport, base_url="http://test") as client:
yield client
async with get_engine().begin() as conn:
await conn.run_sync(Base.metadata.drop_all)
await dispose_engine()
finally:
_storage_provider._storage = None
os.environ.pop("MEDIA_PATH", None)
get_settings.cache_clear()
async def _login(api: AsyncClient) -> str:
resp = await api.post(
"/api/v1/auth/login", json={"username": "admin", "password": "adminpass1"}
)
assert resp.status_code == 200
return str(resp.json()["access_token"])
async def test_search_aggregates_fetch_sources(api: AsyncClient) -> None:
token = await _login(api)
headers = {"Authorization": f"Bearer {token}"}
resp = await api.get("/api/v1/search", params={"q": "queen"}, headers=headers)
assert resp.status_code == 200
body = resp.json()
assert body["searched_sources"] == ["youtube"]
assert len(body["results"]) == 1
hit = body["results"][0]
assert hit["source"] == "youtube"
assert hit["source_id"] == "vid-1"
assert hit["title"] == "queen song"
async def test_source_scoped_search(api: AsyncClient) -> None:
token = await _login(api)
headers = {"Authorization": f"Bearer {token}"}
resp = await api.get("/api/v1/sources/youtube/search", params={"q": "abba"}, headers=headers)
assert resp.status_code == 200
assert resp.json()["results"][0]["title"] == "abba song"
async def test_download_create_list_and_complete(
api: AsyncClient, tmp_path: Path, monkeypatch: pytest.MonkeyPatch
) -> None:
token = await _login(api)
headers = {"Authorization": f"Bearer {token}"}
# Request a download — Redis is absent, so enqueue degrades but the job persists.
create = await api.post(
"/api/v1/downloads",
json={"source": "youtube", "source_id": "vid-1", "query": "queen"},
headers=headers,
)
assert create.status_code == 202
body = create.json()
assert body["already_in_library"] is False
job_id = body["job"]["id"]
assert body["job"]["status"] == "queued"
# It shows up in the listing.
listing = await api.get("/api/v1/downloads", headers=headers)
assert listing.status_code == 200
assert any(j["id"] == job_id for j in listing.json()["items"])
# A duplicate request returns the same in-flight job, not a new one.
dup = await api.post(
"/api/v1/downloads",
json={"source": "youtube", "source_id": "vid-1"},
headers=headers,
)
assert dup.json()["job"]["id"] == job_id
# Run the worker task directly (bypasses Redis) with the fake fetch source.
import app.workers.tasks.download_task as dl_task
worker_dl = tmp_path / "worker-dl"
worker_dl.mkdir()
fake = SourceRegistry([FakeFetchSource(worker_dl)]) # type: ignore[list-item]
monkeypatch.setattr(dl_task, "build_source_registry", lambda _settings: fake)
result = await dl_task.download_track({}, job_id=job_id)
assert result["status"] == "done"
track_id = result["track_id"]
# The job is now done and linked to the imported track.
got = await api.get(f"/api/v1/downloads/{job_id}", headers=headers)
assert got.json()["status"] == "done"
assert got.json()["track_id"] == track_id
# The imported track streams back.
stream = await api.get(f"/api/v1/stream/{track_id}", headers=headers)
assert stream.status_code == 200
assert len(stream.content) > 0
# A new request for the same item now dedups against the library.
again = await api.post(
"/api/v1/downloads",
json={"source": "youtube", "source_id": "vid-1"},
headers=headers,
)
assert again.json()["already_in_library"] is True
assert again.json()["track_id"] == track_id
async def test_cancel_download(api: AsyncClient) -> None:
token = await _login(api)
headers = {"Authorization": f"Bearer {token}"}
create = await api.post(
"/api/v1/downloads",
json={"source": "youtube", "source_id": "vid-cancel"},
headers=headers,
)
job_id = create.json()["job"]["id"]
cancel = await api.delete(f"/api/v1/downloads/{job_id}", headers=headers)
assert cancel.status_code == 204
got = await api.get(f"/api/v1/downloads/{job_id}", headers=headers)
assert got.status_code == 404
+7 -2
View File
@@ -48,12 +48,17 @@ def test_info_reports_kind_and_availability(tmp_path: Path) -> None:
def test_registry_registers_local_when_path_set(tmp_path: Path) -> None:
registry = build_source_registry(_settings(local_media_import_path=tmp_path))
# Disable youtube to isolate the local-source registration under test.
registry = build_source_registry(
_settings(local_media_import_path=tmp_path, youtube_enabled=False)
)
names = {info.name for info in registry.infos()}
assert names == {"local"}
assert registry.indexable("local").is_available() is True
def test_registry_empty_when_path_unset() -> None:
registry = build_source_registry(_settings(local_media_import_path=None))
registry = build_source_registry(
_settings(local_media_import_path=None, youtube_enabled=False)
)
assert registry.infos() == []
+1 -3
View File
@@ -107,9 +107,7 @@ async def test_track_out_includes_genre_year_track_number(api: AsyncClient) -> N
token = await _login(api)
track_id = await _upload(api, token)
resp = await api.get(
f"/api/v1/tracks/{track_id}", headers={"Authorization": f"Bearer {token}"}
)
resp = await api.get(f"/api/v1/tracks/{track_id}", headers={"Authorization": f"Bearer {token}"})
assert resp.status_code == 200, resp.text
body = resp.json()
assert "genre" in body
+135
View File
@@ -0,0 +1,135 @@
"""Unit tests for YouTubeMusicSource + registry (no network, injected libs)."""
from pathlib import Path
from typing import Any
import pytest
from app.core.config import Settings
from app.domain.sources import KIND_FETCH
from app.infrastructure.sources.registry import build_source_registry
from app.infrastructure.sources.youtube import YouTubeMusicSource
pytestmark = pytest.mark.asyncio
def _song_row(**overrides: Any) -> dict[str, Any]:
row: dict[str, Any] = {
"videoId": "abc123",
"title": "Bohemian Rhapsody",
"artists": [{"name": "Queen", "id": "a1"}],
"album": {"name": "A Night at the Opera", "id": "al1"},
"duration_seconds": 354,
"thumbnails": [
{"url": "http://img/small.jpg", "width": 60, "height": 60},
{"url": "http://img/large.jpg", "width": 240, "height": 240},
],
}
row.update(overrides)
return row
def _settings(**overrides: object) -> Settings:
return Settings(**overrides) # type: ignore[arg-type]
async def test_search_maps_ytmusic_rows() -> None:
source = YouTubeMusicSource(search_fn=lambda q, limit: [_song_row()])
[result] = await source.search("queen", limit=10)
assert result.source == "youtube"
assert result.source_id == "abc123"
assert result.title == "Bohemian Rhapsody"
assert result.artist == "Queen"
assert result.album == "A Night at the Opera"
assert result.duration_seconds == 354
assert result.thumbnail_url == "http://img/large.jpg" # last (largest)
async def test_search_joins_multiple_artists_and_tolerates_missing_fields() -> None:
row = _song_row(
artists=[{"name": "Queen"}, {"name": "David Bowie"}],
album=None,
thumbnails=[],
duration_seconds=None,
)
source = YouTubeMusicSource(search_fn=lambda q, limit: [row])
[result] = await source.search("under pressure", limit=10)
assert result.artist == "Queen, David Bowie"
assert result.album is None
assert result.thumbnail_url is None
assert result.duration_seconds is None
async def test_search_drops_rows_without_video_id() -> None:
rows = [_song_row(), _song_row(videoId=None), _song_row(videoId="xyz")]
source = YouTubeMusicSource(search_fn=lambda q, limit: rows)
results = await source.search("q", limit=10)
assert [r.source_id for r in results] == ["abc123", "xyz"]
async def test_search_empty_query_short_circuits() -> None:
called = False
def _search(q: str, limit: int) -> list[dict[str, Any]]:
nonlocal called
called = True
return []
source = YouTubeMusicSource(search_fn=_search)
assert await source.search(" ", limit=10) == []
assert called is False
async def test_search_degrades_to_empty_on_error() -> None:
def _boom(q: str, limit: int) -> list[dict[str, Any]]:
raise RuntimeError("service down")
source = YouTubeMusicSource(search_fn=_boom)
assert await source.search("q", limit=10) == []
async def test_fetch_maps_download_result(tmp_path: Path) -> None:
audio = tmp_path / "abc123.m4a"
audio.write_bytes(b"opus-bytes" * 10)
def _download(video_id: str, tmp_dir: Path, hook: Any, cookies: Path | None) -> dict[str, Any]:
return {
"filepath": audio,
"file_format": "m4a",
"bitrate": 160,
"title": "Bohemian Rhapsody",
}
source = YouTubeMusicSource(download_fn=_download)
result = await source.fetch("abc123")
assert result.source_id == "abc123"
assert result.path == audio
assert result.file_format == "m4a"
assert result.file_size == len(b"opus-bytes" * 10)
assert result.bitrate == 160
assert result.suggested_title == "Bohemian Rhapsody"
async def test_info_and_availability_with_injected_fn() -> None:
source = YouTubeMusicSource(search_fn=lambda q, limit: [])
info = source.info()
assert info.name == "youtube"
assert info.kind == KIND_FETCH
assert info.available is True # injected fn → treated as available
async def test_registry_registers_youtube_when_enabled() -> None:
registry = build_source_registry(_settings(youtube_enabled=True))
names = {info.name for info in registry.infos()}
assert "youtube" in names
# youtube is searchable + fetchable, not indexable
assert registry.searchable("youtube").name == "youtube"
assert registry.fetchable("youtube").name == "youtube"
async def test_registry_omits_youtube_when_disabled() -> None:
registry = build_source_registry(_settings(youtube_enabled=False))
names = {info.name for info in registry.infos()}
assert "youtube" not in names