Files
mcma-backend/app/application/upload_service.py
T
Senko-san c72d19599a
Docker Build & Publish / push (push) Has been cancelled
Docker Build & Publish / Prune old image versions (push) Has been cancelled
Docker Build & Publish / build (push) Failing after 10m8s
feat(enrichment): tag-first metadata pipeline (§1D)
Implements the §6.2 enrichment pipeline: embedded tags → Chromaprint
fingerprint → AcoustID lookup. Well-tagged files get correct
artist/album/title offline; the rest are identified via AcoustID
(which also yields a MusicBrainz recording id in one call).

- domain: AudioTags/Fingerprint/RecordingMatch value objects; ports
  AudioTagReader, AudioFingerprinter, AcoustIdClient; TrackRepository
  .apply_enrichment (gap-fill, never erases) + AlbumRepository.get_or_create
- infrastructure/metadata: MutagenTagReader, FpcalcFingerprinter,
  AcoustIdHttpClient (rich meta=recordings+releasegroups, throttled)
- application: MetadataEnrichmentService — tags preferred, AcoustID fills
  gaps; resolves artist/album; status enriched/failed; skips manual;
  every external step wrapped (graceful degradation)
- workers: enrich_task registered; enqueue_enrich is best-effort and
  deferred so the caller's txn commits before the worker reads the row
- wiring: upload enqueues after add; import returns imported_ids and
  enqueues post-commit (mid-scan would race the worker); manual
  POST /tracks/{id}/metadata/enrich endpoint
- deps: add mutagen (fpcalc/ffmpeg already in the image)

Tests: metadata service orchestration, AcoustID parser, tag helpers.
125 passed; mypy strict + ruff clean.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-09 13:04:02 +03:00

123 lines
3.5 KiB
Python

"""UploadService — handles user file uploads."""
import contextlib
import hashlib
import os
import tempfile
import uuid
from collections.abc import Awaitable, Callable
from dataclasses import dataclass
from pathlib import Path
from typing import Protocol
import anyio
from app.domain.entities.user import User
from app.domain.ports import ArtistRepository, FileStorage, TrackRepository
EnrichEnqueuer = Callable[[uuid.UUID], Awaitable[None]]
class UploadFileProtocol(Protocol):
filename: str | None
async def read(self, size: int = -1) -> bytes: ...
@dataclass(frozen=True)
class UploadResult:
track_id: uuid.UUID
title: str
already_exists: bool
async def _stream_to_temp(upload: UploadFileProtocol, dest: Path) -> tuple[str, int]:
h = hashlib.sha256()
size = 0
async with await anyio.open_file(dest, "wb") as out:
while True:
chunk = await upload.read(65536)
if not chunk:
break
h.update(chunk)
await out.write(chunk)
size += len(chunk)
return h.hexdigest(), size
class UploadService:
def __init__(
self,
tracks: TrackRepository,
artists: ArtistRepository,
storage: FileStorage,
tmp_dir: Path | None = None,
enqueue_enrich: EnrichEnqueuer | None = None,
) -> None:
self._tracks = tracks
self._artists = artists
self._storage = storage
self._tmp_dir = tmp_dir
self._enqueue_enrich = enqueue_enrich
async def handle_upload(
self,
*,
upload: UploadFileProtocol,
user: User,
) -> UploadResult:
filename = upload.filename or "unknown"
ext = Path(filename).suffix.lower().lstrip(".") or "bin"
title = Path(filename).stem or "Unknown"
fd, tmp_str = tempfile.mkstemp(
suffix=f".{ext}",
dir=str(self._tmp_dir) if self._tmp_dir else None,
)
tmp_path = Path(tmp_str)
try:
os.close(fd)
sha256_hex, file_size = await _stream_to_temp(upload, tmp_path)
existing = await self._tracks.get_by_source("upload", sha256_hex)
if existing is not None:
return UploadResult(
track_id=existing.id,
title=existing.title,
already_exists=True,
)
track_id = uuid.uuid4()
key = f"tracks/{str(track_id)[:2]}/{track_id}.{ext}"
await self._storage.save_file(key, tmp_path)
try:
artist = await self._artists.get_or_create("Unknown Artist")
track = await self._tracks.add(
id=track_id,
title=title,
artist_id=artist.id,
storage_uri=key,
file_format=ext,
file_size=file_size,
source="upload",
source_id=sha256_hex,
metadata_status="pending",
added_by=user.id,
)
except Exception:
with contextlib.suppress(Exception):
await self._storage.delete(key)
raise
if self._enqueue_enrich is not None:
await self._enqueue_enrich(track.id)
return UploadResult(
track_id=track.id,
title=track.title,
already_exists=False,
)
finally:
await anyio.Path(tmp_path).unlink(missing_ok=True)