Files
mcma-backend/app/infrastructure/db/repositories/album_repository.py
T
Senko-san c72d19599a
Docker Build & Publish / push (push) Has been cancelled
Docker Build & Publish / Prune old image versions (push) Has been cancelled
Docker Build & Publish / build (push) Failing after 10m8s
feat(enrichment): tag-first metadata pipeline (§1D)
Implements the §6.2 enrichment pipeline: embedded tags → Chromaprint
fingerprint → AcoustID lookup. Well-tagged files get correct
artist/album/title offline; the rest are identified via AcoustID
(which also yields a MusicBrainz recording id in one call).

- domain: AudioTags/Fingerprint/RecordingMatch value objects; ports
  AudioTagReader, AudioFingerprinter, AcoustIdClient; TrackRepository
  .apply_enrichment (gap-fill, never erases) + AlbumRepository.get_or_create
- infrastructure/metadata: MutagenTagReader, FpcalcFingerprinter,
  AcoustIdHttpClient (rich meta=recordings+releasegroups, throttled)
- application: MetadataEnrichmentService — tags preferred, AcoustID fills
  gaps; resolves artist/album; status enriched/failed; skips manual;
  every external step wrapped (graceful degradation)
- workers: enrich_task registered; enqueue_enrich is best-effort and
  deferred so the caller's txn commits before the worker reads the row
- wiring: upload enqueues after add; import returns imported_ids and
  enqueues post-commit (mid-scan would race the worker); manual
  POST /tracks/{id}/metadata/enrich endpoint
- deps: add mutagen (fpcalc/ffmpeg already in the image)

Tests: metadata service orchestration, AcoustID parser, tag helpers.
125 passed; mypy strict + ruff clean.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-09 13:04:02 +03:00

132 lines
4.5 KiB
Python

"""Album repository — adapter over ``AsyncSession``."""
import uuid
from sqlalchemy import func, select
from sqlalchemy.ext.asyncio import AsyncSession
from app.domain.entities.album import Album
from app.infrastructure.db.models.album import AlbumModel
from app.infrastructure.db.models.track import TrackModel
def _to_entity(row: AlbumModel) -> Album:
return Album(
id=row.id,
title=row.title,
artist_id=row.artist_id,
year=row.year,
cover_path=row.cover_path,
musicbrainz_id=row.musicbrainz_id,
created_at=row.created_at,
updated_at=row.updated_at,
)
class SqlAlchemyAlbumRepository:
def __init__(self, session: AsyncSession) -> None:
self._session = session
async def get_or_create(
self,
*,
title: str,
artist_id: uuid.UUID,
year: int | None,
musicbrainz_id: str | None,
) -> Album:
"""Resolve an album by ``(title, artist_id)``, creating it if absent.
Backfills ``year``/``musicbrainz_id`` onto an existing row when it lacks
them and enrichment now has values (gap-fill, never overwrite)."""
row = (
await self._session.execute(
select(AlbumModel).where(
AlbumModel.title == title,
AlbumModel.artist_id == artist_id,
)
)
).scalar_one_or_none()
if row is None:
row = AlbumModel(
title=title,
artist_id=artist_id,
year=year,
musicbrainz_id=musicbrainz_id,
)
self._session.add(row)
else:
if row.year is None and year is not None:
row.year = year
if row.musicbrainz_id is None and musicbrainz_id is not None:
row.musicbrainz_id = musicbrainz_id
await self._session.flush()
await self._session.refresh(row)
return _to_entity(row)
async def get_by_id(self, album_id: uuid.UUID) -> Album | None:
row = await self._session.get(AlbumModel, album_id)
return _to_entity(row) if row is not None else None
async def get_many(self, ids: list[uuid.UUID]) -> list[Album]:
if not ids:
return []
rows = (
(await self._session.execute(select(AlbumModel).where(AlbumModel.id.in_(ids))))
.scalars()
.all()
)
return [_to_entity(r) for r in rows]
async def count(self, *, artist_id: uuid.UUID | None, q: str | None) -> int:
stmt = select(func.count()).select_from(AlbumModel)
if artist_id is not None:
stmt = stmt.where(AlbumModel.artist_id == artist_id)
if q:
stmt = stmt.where(AlbumModel.title.ilike(f"%{q}%"))
return (await self._session.execute(stmt)).scalar_one()
async def track_count(self, album_id: uuid.UUID) -> int:
return (
await self._session.execute(
select(func.count()).select_from(TrackModel).where(TrackModel.album_id == album_id)
)
).scalar_one()
async def track_count_many(self, album_ids: list[uuid.UUID]) -> dict[uuid.UUID, int]:
if not album_ids:
return {}
rows = (
await self._session.execute(
select(TrackModel.album_id, func.count(TrackModel.id).label("cnt"))
.where(TrackModel.album_id.in_(album_ids))
.group_by(TrackModel.album_id)
)
).all()
return {row.album_id: row.cnt for row in rows}
# list must come after methods using list[...] in signatures (builtin name shadowing)
async def list(
self,
*,
artist_id: uuid.UUID | None,
q: str | None,
limit: int,
offset: int,
sort_by: str = "title",
order: str = "asc",
) -> list[Album]:
stmt = select(AlbumModel)
if artist_id is not None:
stmt = stmt.where(AlbumModel.artist_id == artist_id)
if q:
stmt = stmt.where(AlbumModel.title.ilike(f"%{q}%"))
if order == "random":
stmt = stmt.order_by(func.random())
else:
col = AlbumModel.created_at if sort_by == "created" else AlbumModel.title
stmt = stmt.order_by(col.desc() if order == "desc" else col.asc())
stmt = stmt.limit(limit).offset(offset)
rows = (await self._session.execute(stmt)).scalars().all()
return [_to_entity(r) for r in rows]