Files
mcma-backend/app/infrastructure/db/repositories/track_repository.py
T
Senko-san c72d19599a
Docker Build & Publish / push (push) Has been cancelled
Docker Build & Publish / Prune old image versions (push) Has been cancelled
Docker Build & Publish / build (push) Failing after 10m8s
feat(enrichment): tag-first metadata pipeline (§1D)
Implements the §6.2 enrichment pipeline: embedded tags → Chromaprint
fingerprint → AcoustID lookup. Well-tagged files get correct
artist/album/title offline; the rest are identified via AcoustID
(which also yields a MusicBrainz recording id in one call).

- domain: AudioTags/Fingerprint/RecordingMatch value objects; ports
  AudioTagReader, AudioFingerprinter, AcoustIdClient; TrackRepository
  .apply_enrichment (gap-fill, never erases) + AlbumRepository.get_or_create
- infrastructure/metadata: MutagenTagReader, FpcalcFingerprinter,
  AcoustIdHttpClient (rich meta=recordings+releasegroups, throttled)
- application: MetadataEnrichmentService — tags preferred, AcoustID fills
  gaps; resolves artist/album; status enriched/failed; skips manual;
  every external step wrapped (graceful degradation)
- workers: enrich_task registered; enqueue_enrich is best-effort and
  deferred so the caller's txn commits before the worker reads the row
- wiring: upload enqueues after add; import returns imported_ids and
  enqueues post-commit (mid-scan would race the worker); manual
  POST /tracks/{id}/metadata/enrich endpoint
- deps: add mutagen (fpcalc/ffmpeg already in the image)

Tests: metadata service orchestration, AcoustID parser, tag helpers.
125 passed; mypy strict + ruff clean.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-09 13:04:02 +03:00

220 lines
7.3 KiB
Python

"""Track repository — adapter over ``AsyncSession``."""
import uuid
from sqlalchemy import func, select
from sqlalchemy.ext.asyncio import AsyncSession
from app.domain.entities.track import Track
from app.domain.errors import NotFoundError
from app.infrastructure.db.models.artist import ArtistModel
from app.infrastructure.db.models.track import TrackModel
def _to_entity(row: TrackModel) -> Track:
return Track(
id=row.id,
title=row.title,
artist_id=row.artist_id,
album_id=row.album_id,
storage_uri=row.storage_uri,
file_format=row.file_format,
file_size=row.file_size,
source=row.source,
source_id=row.source_id,
duration_seconds=row.duration_seconds,
genre=row.genre,
year=row.year,
metadata_status=row.metadata_status,
created_at=row.created_at,
updated_at=row.updated_at,
)
class SqlAlchemyTrackRepository:
def __init__(self, session: AsyncSession) -> None:
self._session = session
async def get_by_id(self, track_id: uuid.UUID) -> Track | None:
row = await self._session.get(TrackModel, track_id)
return _to_entity(row) if row is not None else None
async def get_by_source(self, source: str, source_id: str) -> Track | None:
row = (
await self._session.execute(
select(TrackModel).where(
TrackModel.source == source,
TrackModel.source_id == source_id,
)
)
).scalar_one_or_none()
return _to_entity(row) if row is not None else None
async def add(
self,
*,
id: uuid.UUID,
title: str,
artist_id: uuid.UUID,
storage_uri: str,
file_format: str,
file_size: int,
source: str,
source_id: str,
metadata_status: str,
added_by: uuid.UUID | None,
) -> Track:
row = TrackModel(
id=id,
title=title,
artist_id=artist_id,
storage_uri=storage_uri,
file_format=file_format,
file_size=file_size,
source=source,
source_id=source_id,
metadata_status=metadata_status,
added_by=added_by,
)
self._session.add(row)
await self._session.flush()
await self._session.refresh(row)
return _to_entity(row)
async def delete(self, track_id: uuid.UUID) -> None:
row = await self._session.get(TrackModel, track_id)
if row is not None:
await self._session.delete(row)
await self._session.flush()
async def genres(self) -> list[tuple[str, int]]:
"""Distinct non-null genres with their song counts, most common first.
Defined before ``list`` — the method named ``list`` shadows the builtin
in later annotations within the class body."""
rows = (
await self._session.execute(
select(TrackModel.genre, func.count(TrackModel.id).label("cnt"))
.where(TrackModel.genre.is_not(None))
.group_by(TrackModel.genre)
.order_by(func.count(TrackModel.id).desc())
)
).all()
return [(row.genre, row.cnt) for row in rows]
async def list(
self,
*,
artist_id: uuid.UUID | None,
album_id: uuid.UUID | None,
q: str | None,
sort_by: str = "created_at",
order: str = "desc",
limit: int = 50,
offset: int = 0,
) -> list[Track]:
stmt = select(TrackModel)
if artist_id is not None:
stmt = stmt.where(TrackModel.artist_id == artist_id)
if album_id is not None:
stmt = stmt.where(TrackModel.album_id == album_id)
if q:
stmt = stmt.where(TrackModel.title.ilike(f"%{q}%"))
if sort_by == "artist":
stmt = stmt.join(ArtistModel, TrackModel.artist_id == ArtistModel.id)
col_artist = ArtistModel.name
stmt = stmt.order_by(col_artist.asc() if order == "asc" else col_artist.desc())
elif sort_by == "title":
col_title = TrackModel.title
stmt = stmt.order_by(col_title.asc() if order == "asc" else col_title.desc())
else:
stmt = stmt.order_by(
TrackModel.created_at.asc() if order == "asc" else TrackModel.created_at.desc()
)
stmt = stmt.limit(limit).offset(offset)
rows = (await self._session.execute(stmt)).scalars().all()
return [_to_entity(r) for r in rows]
async def count(
self,
*,
artist_id: uuid.UUID | None,
album_id: uuid.UUID | None,
q: str | None,
) -> int:
stmt = select(func.count()).select_from(TrackModel)
if artist_id is not None:
stmt = stmt.where(TrackModel.artist_id == artist_id)
if album_id is not None:
stmt = stmt.where(TrackModel.album_id == album_id)
if q:
stmt = stmt.where(TrackModel.title.ilike(f"%{q}%"))
return (await self._session.execute(stmt)).scalar_one()
async def update(
self,
track_id: uuid.UUID,
*,
title: str | None,
genre: str | None,
year: int | None,
) -> Track:
row = await self._session.get(TrackModel, track_id)
if row is None:
raise NotFoundError(f"Track {track_id} not found.")
if title is not None:
row.title = title
if genre is not None:
row.genre = genre
if year is not None:
row.year = year
row.metadata_status = "manual"
await self._session.flush()
await self._session.refresh(row)
return _to_entity(row)
async def apply_enrichment(
self,
track_id: uuid.UUID,
*,
title: str,
artist_id: uuid.UUID,
album_id: uuid.UUID | None,
genre: str | None,
year: int | None,
track_number: int | None,
duration_seconds: int | None,
bitrate: int | None,
acoustid_fingerprint: str | None,
musicbrainz_id: str | None,
metadata_status: str,
) -> Track:
row = await self._session.get(TrackModel, track_id)
if row is None:
raise NotFoundError(f"Track {track_id} not found.")
# Identity + status are authoritative for an enrichment run.
row.title = title
row.artist_id = artist_id
row.metadata_status = metadata_status
# Nullable extras: fill gaps only — never erase data a prior run found.
if album_id is not None:
row.album_id = album_id
if genre is not None:
row.genre = genre
if year is not None:
row.year = year
if track_number is not None:
row.track_number = track_number
if duration_seconds is not None:
row.duration_seconds = duration_seconds
if bitrate is not None:
row.bitrate = bitrate
if acoustid_fingerprint is not None:
row.acoustid_fingerprint = acoustid_fingerprint
if musicbrainz_id is not None:
row.musicbrainz_id = musicbrainz_id
await self._session.flush()
await self._session.refresh(row)
return _to_entity(row)