feat(sources): local_folder source backend + import pipeline
Docker Build & Publish / build (push) Has been cancelled
Docker Build & Publish / push (push) Has been cancelled
Docker Build & Publish / Prune old image versions (push) Has been cancelled

First ingest path beyond manual upload (plan §1C). Source abstraction +
the first concrete backend, so a homelab can index an existing library.

- domain: SourceBackend/IndexableSource ports + SourceInfo/SourceFile shapes
- infrastructure/sources: LocalFolderSource (walks a mounted dir, idempotent
  source_id = relative path) + registry built from settings
- application: LibraryImportService — batch sibling of UploadService; dedup on
  (source, source_id), copy into storage, minimal track (metadata_status=pending,
  enrichment fills the rest in 1D), per-file failures isolated
- workers: scan_local_folder arq task (registered) + enqueue helper (503 if
  Redis down)
- api: GET /sources, POST /sources/{source}/scan (admin, enqueues), /health
- config: LOCAL_MEDIA_IMPORT_PATH; README + .env.example documented
- tests: scanner, registry, import service (fakes) + DB-gated sources API path

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
Senko-san
2026-06-08 20:02:09 +03:00
parent 551afbab13
commit 48e3418c7f
19 changed files with 800 additions and 11 deletions
+2 -5
View File
@@ -10,6 +10,7 @@ from arq.connections import RedisSettings
from app.core.config import get_settings
from app.core.logging import configure_logging, get_logger
from app.workers.tasks.import_task import scan_local_folder
log = get_logger("worker")
@@ -24,12 +25,8 @@ async def shutdown(_ctx: dict[str, Any]) -> None:
log.info("worker_shutdown")
async def _noop(_ctx: dict[str, Any]) -> None:
pass
class WorkerSettings:
functions: ClassVar[list[Any]] = [_noop] # populated as tasks are implemented
functions: ClassVar[list[Any]] = [scan_local_folder]
on_startup = startup
on_shutdown = shutdown
max_jobs = get_settings().max_parallel_downloads
+30
View File
@@ -0,0 +1,30 @@
"""Enqueue helper — submit a job to the arq queue from the request cycle.
A short-lived pool per call keeps things simple (enqueues are rare, admin-driven
actions). Redis being down degrades to a clean 503 rather than a crash
(graceful degradation)."""
from typing import Any
from arq import create_pool
from arq.connections import RedisSettings
from app.core.config import get_settings
from app.domain.errors import DependencyUnavailableError
async def enqueue(function: str, **kwargs: Any) -> str:
"""Enqueue ``function`` by name, returning the job id. Raises
:class:`DependencyUnavailableError` if the queue can't be reached."""
settings = get_settings()
try:
pool = await create_pool(RedisSettings.from_dsn(str(settings.redis_url)))
except Exception as exc:
raise DependencyUnavailableError("Task queue (Redis) is unavailable.") from exc
try:
job = await pool.enqueue_job(function, **kwargs)
finally:
await pool.aclose()
if job is None:
raise DependencyUnavailableError("Could not enqueue job.")
return str(job.job_id)
+1
View File
@@ -0,0 +1 @@
"""arq task functions. Registered in ``app.workers.arq_worker.WorkerSettings``."""
+46
View File
@@ -0,0 +1,46 @@
"""arq task: scan an indexable source and import its files into the library.
Heavy work (directory walk + file copies) belongs off the request cycle
(CLAUDE.md). The HTTP endpoint enqueues this; the worker runs it with its own
transactional session.
"""
import uuid
from typing import Any
from app.application.import_service import LibraryImportService
from app.core.config import get_settings
from app.core.logging import get_logger
from app.infrastructure.db import session_scope
from app.infrastructure.db.repositories import (
SqlAlchemyArtistRepository,
SqlAlchemyTrackRepository,
)
from app.infrastructure.sources.registry import build_source_registry
from app.infrastructure.storage.provider import get_file_storage
log = get_logger("worker.import")
async def scan_local_folder(
_ctx: dict[str, Any], *, source: str = "local", added_by: str | None = None
) -> dict[str, Any]:
registry = build_source_registry(get_settings())
backend = registry.indexable(source)
actor = uuid.UUID(added_by) if added_by else None
async with session_scope() as session:
service = LibraryImportService(
tracks=SqlAlchemyTrackRepository(session),
artists=SqlAlchemyArtistRepository(session),
storage=get_file_storage(),
)
summary = await service.scan_and_import(backend, added_by=actor)
return {
"source": summary.source,
"seen": summary.seen,
"imported": summary.imported,
"skipped": summary.skipped,
"failed": summary.failed,
}