feat(sources): local_folder source backend + import pipeline
First ingest path beyond manual upload (plan §1C). Source abstraction +
the first concrete backend, so a homelab can index an existing library.
- domain: SourceBackend/IndexableSource ports + SourceInfo/SourceFile shapes
- infrastructure/sources: LocalFolderSource (walks a mounted dir, idempotent
source_id = relative path) + registry built from settings
- application: LibraryImportService — batch sibling of UploadService; dedup on
(source, source_id), copy into storage, minimal track (metadata_status=pending,
enrichment fills the rest in 1D), per-file failures isolated
- workers: scan_local_folder arq task (registered) + enqueue helper (503 if
Redis down)
- api: GET /sources, POST /sources/{source}/scan (admin, enqueues), /health
- config: LOCAL_MEDIA_IMPORT_PATH; README + .env.example documented
- tests: scanner, registry, import service (fakes) + DB-gated sources API path
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,157 @@
|
||||
"""Unit tests for LibraryImportService — DB-free, in-memory fakes."""
|
||||
|
||||
import datetime as dt
|
||||
import uuid
|
||||
from collections.abc import Iterator
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
from app.application.import_service import LibraryImportService
|
||||
from app.domain.entities import Artist, Track
|
||||
from app.domain.sources import SourceFile, SourceInfo
|
||||
|
||||
pytestmark = pytest.mark.asyncio
|
||||
|
||||
|
||||
class FakeArtistRepo:
|
||||
def __init__(self) -> None:
|
||||
self._by_name: dict[str, Artist] = {}
|
||||
|
||||
async def get_or_create(self, name: str) -> Artist:
|
||||
if name not in self._by_name:
|
||||
now = dt.datetime.now(dt.UTC)
|
||||
self._by_name[name] = Artist(id=uuid.uuid4(), name=name, created_at=now, updated_at=now)
|
||||
return self._by_name[name]
|
||||
|
||||
|
||||
class FakeTrackRepo:
|
||||
def __init__(self, *, fail_on: set[str] | None = None) -> None:
|
||||
self.by_source: dict[tuple[str, str], Track] = {}
|
||||
self.added: list[Track] = []
|
||||
self._fail_on = fail_on or set()
|
||||
|
||||
async def get_by_source(self, source: str, source_id: str) -> Track | None:
|
||||
return self.by_source.get((source, source_id))
|
||||
|
||||
async def add(self, **kw: object) -> Track:
|
||||
source_id = str(kw["source_id"])
|
||||
if source_id in self._fail_on:
|
||||
raise RuntimeError("simulated add failure")
|
||||
now = dt.datetime.now(dt.UTC)
|
||||
track = Track(
|
||||
id=uuid.UUID(str(kw["id"])) if not isinstance(kw["id"], uuid.UUID) else kw["id"],
|
||||
title=str(kw["title"]),
|
||||
artist_id=kw["artist_id"], # type: ignore[arg-type]
|
||||
album_id=None,
|
||||
storage_uri=str(kw["storage_uri"]),
|
||||
file_format=str(kw["file_format"]),
|
||||
file_size=int(kw["file_size"]), # type: ignore[call-overload]
|
||||
source=str(kw["source"]),
|
||||
source_id=source_id,
|
||||
duration_seconds=None,
|
||||
genre=None,
|
||||
year=None,
|
||||
metadata_status=str(kw["metadata_status"]),
|
||||
created_at=now,
|
||||
updated_at=now,
|
||||
)
|
||||
self.by_source[(track.source, track.source_id)] = track
|
||||
self.added.append(track)
|
||||
return track
|
||||
|
||||
|
||||
class FakeStorage:
|
||||
def __init__(self) -> None:
|
||||
self.saved: dict[str, Path] = {}
|
||||
self.deleted: list[str] = []
|
||||
|
||||
async def save_file(self, key: str, src_path: Path) -> int:
|
||||
self.saved[key] = src_path
|
||||
return 1
|
||||
|
||||
async def delete(self, key: str) -> None:
|
||||
self.deleted.append(key)
|
||||
|
||||
|
||||
class FakeSource:
|
||||
name = "local"
|
||||
|
||||
def __init__(self, files: list[SourceFile]) -> None:
|
||||
self._files = files
|
||||
|
||||
def info(self) -> SourceInfo:
|
||||
return SourceInfo(name=self.name, label="Local", kind="indexable", available=True)
|
||||
|
||||
def is_available(self) -> bool:
|
||||
return True
|
||||
|
||||
def scan(self) -> Iterator[SourceFile]:
|
||||
yield from self._files
|
||||
|
||||
|
||||
def _file(source_id: str) -> SourceFile:
|
||||
return SourceFile(
|
||||
source_id=source_id,
|
||||
path=Path("/music") / source_id,
|
||||
suggested_title=Path(source_id).stem,
|
||||
file_format="mp3",
|
||||
file_size=123,
|
||||
)
|
||||
|
||||
|
||||
def _service(tracks: FakeTrackRepo, storage: FakeStorage) -> LibraryImportService:
|
||||
return LibraryImportService(tracks=tracks, artists=FakeArtistRepo(), storage=storage) # type: ignore[arg-type]
|
||||
|
||||
|
||||
async def test_imports_new_files() -> None:
|
||||
tracks, storage = FakeTrackRepo(), FakeStorage()
|
||||
source = FakeSource([_file("a.mp3"), _file("b/c.mp3")])
|
||||
|
||||
summary = await _service(tracks, storage).scan_and_import(source, added_by=None) # type: ignore[arg-type]
|
||||
|
||||
assert (summary.seen, summary.imported, summary.skipped, summary.failed) == (2, 2, 0, 0)
|
||||
assert len(tracks.added) == 2
|
||||
assert len(storage.saved) == 2
|
||||
assert all(t.metadata_status == "pending" for t in tracks.added)
|
||||
assert all(t.source == "local" for t in tracks.added)
|
||||
|
||||
|
||||
async def test_dedup_skips_already_imported() -> None:
|
||||
tracks, storage = FakeTrackRepo(), FakeStorage()
|
||||
now = dt.datetime.now(dt.UTC)
|
||||
tracks.by_source[("local", "a.mp3")] = Track(
|
||||
id=uuid.uuid4(),
|
||||
title="a",
|
||||
artist_id=uuid.uuid4(),
|
||||
album_id=None,
|
||||
storage_uri="k",
|
||||
file_format="mp3",
|
||||
file_size=1,
|
||||
source="local",
|
||||
source_id="a.mp3",
|
||||
duration_seconds=None,
|
||||
genre=None,
|
||||
year=None,
|
||||
metadata_status="pending",
|
||||
created_at=now,
|
||||
updated_at=now,
|
||||
)
|
||||
source = FakeSource([_file("a.mp3"), _file("new.mp3")])
|
||||
|
||||
summary = await _service(tracks, storage).scan_and_import(source, added_by=None) # type: ignore[arg-type]
|
||||
|
||||
assert (summary.imported, summary.skipped) == (1, 1)
|
||||
assert len(storage.saved) == 1 # only the new file copied
|
||||
|
||||
|
||||
async def test_per_file_failure_is_isolated_and_rolls_back_storage() -> None:
|
||||
tracks = FakeTrackRepo(fail_on={"bad.mp3"})
|
||||
storage = FakeStorage()
|
||||
source = FakeSource([_file("good.mp3"), _file("bad.mp3")])
|
||||
|
||||
summary = await _service(tracks, storage).scan_and_import(source, added_by=None) # type: ignore[arg-type]
|
||||
|
||||
assert (summary.seen, summary.imported, summary.failed) == (2, 1, 1)
|
||||
# The failed import's copied file was cleaned up; the good one stays.
|
||||
assert len(storage.deleted) == 1
|
||||
assert len(tracks.added) == 1
|
||||
@@ -0,0 +1,59 @@
|
||||
"""Unit tests for the local-folder source + registry (no DB, no network)."""
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
from app.core.config import Settings
|
||||
from app.infrastructure.sources.local_folder import LocalFolderSource
|
||||
from app.infrastructure.sources.registry import build_source_registry
|
||||
|
||||
|
||||
def _settings(**overrides: object) -> Settings:
|
||||
return Settings(**overrides) # type: ignore[arg-type]
|
||||
|
||||
|
||||
def test_scan_discovers_audio_recursively(tmp_path: Path) -> None:
|
||||
(tmp_path / "a.mp3").write_bytes(b"x")
|
||||
(tmp_path / "sub").mkdir()
|
||||
(tmp_path / "sub" / "b.flac").write_bytes(b"yy")
|
||||
(tmp_path / "notes.txt").write_bytes(b"ignore me") # non-audio → skipped
|
||||
|
||||
files = list(LocalFolderSource(tmp_path).scan())
|
||||
by_id = {f.source_id: f for f in files}
|
||||
|
||||
assert set(by_id) == {"a.mp3", "sub/b.flac"}
|
||||
assert by_id["a.mp3"].file_format == "mp3"
|
||||
assert by_id["a.mp3"].suggested_title == "a"
|
||||
assert by_id["sub/b.flac"].file_format == "flac"
|
||||
assert by_id["sub/b.flac"].file_size == 2
|
||||
|
||||
|
||||
def test_source_id_is_stable_relative_path(tmp_path: Path) -> None:
|
||||
(tmp_path / "x.opus").write_bytes(b"z")
|
||||
[only] = list(LocalFolderSource(tmp_path).scan())
|
||||
assert only.source_id == "x.opus"
|
||||
assert only.path == tmp_path / "x.opus"
|
||||
|
||||
|
||||
def test_is_available_false_when_missing(tmp_path: Path) -> None:
|
||||
source = LocalFolderSource(tmp_path / "nope")
|
||||
assert source.is_available() is False
|
||||
assert list(source.scan()) == [] # scanning an unavailable source yields nothing
|
||||
|
||||
|
||||
def test_info_reports_kind_and_availability(tmp_path: Path) -> None:
|
||||
info = LocalFolderSource(tmp_path).info()
|
||||
assert info.name == "local"
|
||||
assert info.kind == "indexable"
|
||||
assert info.available is True
|
||||
|
||||
|
||||
def test_registry_registers_local_when_path_set(tmp_path: Path) -> None:
|
||||
registry = build_source_registry(_settings(local_media_import_path=tmp_path))
|
||||
names = {info.name for info in registry.infos()}
|
||||
assert names == {"local"}
|
||||
assert registry.indexable("local").is_available() is True
|
||||
|
||||
|
||||
def test_registry_empty_when_path_unset() -> None:
|
||||
registry = build_source_registry(_settings(local_media_import_path=None))
|
||||
assert registry.infos() == []
|
||||
@@ -0,0 +1,151 @@
|
||||
"""Integration tests for sources: enumeration + the real import path.
|
||||
|
||||
Requires a reachable Postgres; skips otherwise. The scan worker task is invoked
|
||||
directly (no Redis needed) so the full DB + storage import path is covered.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
from collections.abc import AsyncIterator
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
from app.core.config import get_settings
|
||||
from app.infrastructure.db import Base, dispose_engine, get_engine, session_scope
|
||||
from app.infrastructure.db.repositories import (
|
||||
SqlAlchemyRefreshTokenRepository,
|
||||
SqlAlchemyUserRepository,
|
||||
)
|
||||
from asgi_lifespan import LifespanManager
|
||||
from httpx import ASGITransport, AsyncClient
|
||||
|
||||
pytestmark = pytest.mark.asyncio
|
||||
|
||||
_db_reachable_cache: bool | None = None
|
||||
|
||||
|
||||
async def _db_reachable() -> bool:
|
||||
global _db_reachable_cache
|
||||
if _db_reachable_cache is not None:
|
||||
return _db_reachable_cache
|
||||
from sqlalchemy import text
|
||||
|
||||
try:
|
||||
async with asyncio.timeout(3):
|
||||
async with get_engine().connect() as conn:
|
||||
await conn.execute(text("SELECT 1"))
|
||||
_db_reachable_cache = True
|
||||
except Exception:
|
||||
_db_reachable_cache = False
|
||||
return _db_reachable_cache
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
async def api(tmp_path: Path) -> AsyncIterator[AsyncClient]:
|
||||
if not await _db_reachable():
|
||||
pytest.skip("Postgres not reachable — integration test skipped.")
|
||||
|
||||
media = tmp_path / "media"
|
||||
music = tmp_path / "music"
|
||||
media.mkdir()
|
||||
music.mkdir()
|
||||
# Two audio files (+ a non-audio file that must be ignored) in a subfolder.
|
||||
(music / "one.mp3").write_bytes(b"first track bytes" * 8)
|
||||
(music / "artist").mkdir()
|
||||
(music / "artist" / "two.flac").write_bytes(b"second track bytes" * 8)
|
||||
(music / "cover.txt").write_bytes(b"not audio")
|
||||
|
||||
os.environ["MEDIA_PATH"] = str(media)
|
||||
os.environ["LOCAL_MEDIA_IMPORT_PATH"] = str(music)
|
||||
get_settings.cache_clear()
|
||||
|
||||
import app.infrastructure.storage.provider as _storage_provider
|
||||
|
||||
_storage_provider._storage = None
|
||||
|
||||
try:
|
||||
async with get_engine().begin() as conn:
|
||||
await conn.run_sync(Base.metadata.drop_all)
|
||||
await conn.run_sync(Base.metadata.create_all)
|
||||
|
||||
from app.application.user_service import UserService
|
||||
from app.core.security import Argon2PasswordHasher
|
||||
|
||||
async with session_scope() as session:
|
||||
await UserService(
|
||||
users=SqlAlchemyUserRepository(session),
|
||||
refresh_tokens=SqlAlchemyRefreshTokenRepository(session),
|
||||
hasher=Argon2PasswordHasher(),
|
||||
).create_user(username="admin", password="adminpass1", is_superuser=True)
|
||||
|
||||
from app.main import create_app
|
||||
|
||||
app = create_app()
|
||||
async with LifespanManager(app):
|
||||
transport = ASGITransport(app=app)
|
||||
async with AsyncClient(transport=transport, base_url="http://test") as client:
|
||||
yield client
|
||||
|
||||
async with get_engine().begin() as conn:
|
||||
await conn.run_sync(Base.metadata.drop_all)
|
||||
await dispose_engine()
|
||||
finally:
|
||||
_storage_provider._storage = None
|
||||
os.environ.pop("MEDIA_PATH", None)
|
||||
os.environ.pop("LOCAL_MEDIA_IMPORT_PATH", None)
|
||||
get_settings.cache_clear()
|
||||
|
||||
|
||||
async def _login(api: AsyncClient) -> str:
|
||||
resp = await api.post(
|
||||
"/api/v1/auth/login", json={"username": "admin", "password": "adminpass1"}
|
||||
)
|
||||
assert resp.status_code == 200
|
||||
return str(resp.json()["access_token"])
|
||||
|
||||
|
||||
async def test_list_sources_includes_local(api: AsyncClient) -> None:
|
||||
token = await _login(api)
|
||||
resp = await api.get("/api/v1/sources", headers={"Authorization": f"Bearer {token}"})
|
||||
assert resp.status_code == 200
|
||||
sources = {s["name"]: s for s in resp.json()}
|
||||
assert "local" in sources
|
||||
assert sources["local"]["available"] is True
|
||||
assert sources["local"]["kind"] == "indexable"
|
||||
|
||||
|
||||
async def test_local_import_creates_streamable_tracks(api: AsyncClient) -> None:
|
||||
token = await _login(api)
|
||||
headers = {"Authorization": f"Bearer {token}"}
|
||||
|
||||
# Run the worker task directly (bypasses Redis); it imports against the DB.
|
||||
from app.workers.tasks.import_task import scan_local_folder
|
||||
|
||||
summary = await scan_local_folder({}, source="local", added_by=None)
|
||||
assert summary["seen"] == 2
|
||||
assert summary["imported"] == 2
|
||||
assert summary["failed"] == 0
|
||||
|
||||
# A second run is idempotent — everything already indexed.
|
||||
again = await scan_local_folder({}, source="local", added_by=None)
|
||||
assert again["imported"] == 0
|
||||
assert again["skipped"] == 2
|
||||
|
||||
listing = await api.get("/api/v1/tracks", headers=headers)
|
||||
assert listing.status_code == 200
|
||||
items = listing.json()["items"]
|
||||
assert len(items) == 2
|
||||
titles = {t["title"] for t in items}
|
||||
assert titles == {"one", "two"}
|
||||
|
||||
# And the imported file actually streams back.
|
||||
track_id = items[0]["id"]
|
||||
stream = await api.get(f"/api/v1/stream/{track_id}", headers=headers)
|
||||
assert stream.status_code == 200
|
||||
assert len(stream.content) > 0
|
||||
|
||||
|
||||
async def test_scan_requires_admin(api: AsyncClient) -> None:
|
||||
# The scan endpoint enqueues to Redis; here we only assert it's admin-gated.
|
||||
resp = await api.post("/api/v1/sources/local/scan")
|
||||
assert resp.status_code == 401
|
||||
Reference in New Issue
Block a user