Files
mcma-backend/app/infrastructure/db/repositories/track_repository.py
T
Senko-san 73d7da440f
Docker Build & Publish / build (push) Has been cancelled
Docker Build & Publish / push (push) Has been cancelled
Docker Build & Publish / Prune old image versions (push) Has been cancelled
feat(enrichment): record status/errors and trust high-confidence AcoustID
Two related gaps surfaced from "uploaded a track, nothing changed / no status":

- A track could stay stuck on `pending` forever (an unexpected worker error
  rolled back the run without recording anything), and `failed` carried no
  reason. Add `tracks.metadata_error` + `tracks.enriched_at` (migration), stamp
  the outcome in apply_enrichment, add TrackRepository.mark_enrichment_failed,
  wrap enrich_task to persist crashes as `failed` in a fresh session, and emit a
  human-readable no-match reason. Expose metadata_error/enriched_at in TrackOut.

- The tag-first merge let junk embedded tags (e.g. "Music Track"/"Sound_13958")
  override even a 0.99-confidence AcoustID match. Add acoustid_trust_score
  (default 0.85): above it the acoustic identity wins for title/artist/album/
  year, tags are fallback; below it, tag-first as before.

Add a license-free real-file fixture (Scarlet Fire / Otis McDonald) whose junk
tags AcoustID overrides, with an always-on tag-reader test plus fpcalc/AcoustID/
network-gated identity + full-pipeline tests (skip on host, run in the container).

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-13 13:29:08 +03:00

241 lines
8.3 KiB
Python

"""Track repository — adapter over ``AsyncSession``."""
import datetime as dt
import uuid
from sqlalchemy import func, select
from sqlalchemy.ext.asyncio import AsyncSession
from app.domain.entities.track import Track
from app.domain.errors import NotFoundError
from app.infrastructure.db.models.artist import ArtistModel
from app.infrastructure.db.models.track import TrackModel
def _to_entity(row: TrackModel) -> Track:
return Track(
id=row.id,
title=row.title,
artist_id=row.artist_id,
album_id=row.album_id,
storage_uri=row.storage_uri,
file_format=row.file_format,
file_size=row.file_size,
source=row.source,
source_id=row.source_id,
duration_seconds=row.duration_seconds,
genre=row.genre,
year=row.year,
metadata_status=row.metadata_status,
metadata_error=row.metadata_error,
enriched_at=row.enriched_at,
created_at=row.created_at,
updated_at=row.updated_at,
)
class SqlAlchemyTrackRepository:
def __init__(self, session: AsyncSession) -> None:
self._session = session
async def get_by_id(self, track_id: uuid.UUID) -> Track | None:
row = await self._session.get(TrackModel, track_id)
return _to_entity(row) if row is not None else None
async def get_by_source(self, source: str, source_id: str) -> Track | None:
row = (
await self._session.execute(
select(TrackModel).where(
TrackModel.source == source,
TrackModel.source_id == source_id,
)
)
).scalar_one_or_none()
return _to_entity(row) if row is not None else None
async def add(
self,
*,
id: uuid.UUID,
title: str,
artist_id: uuid.UUID,
storage_uri: str,
file_format: str,
file_size: int,
source: str,
source_id: str,
metadata_status: str,
added_by: uuid.UUID | None,
) -> Track:
row = TrackModel(
id=id,
title=title,
artist_id=artist_id,
storage_uri=storage_uri,
file_format=file_format,
file_size=file_size,
source=source,
source_id=source_id,
metadata_status=metadata_status,
added_by=added_by,
)
self._session.add(row)
await self._session.flush()
await self._session.refresh(row)
return _to_entity(row)
async def delete(self, track_id: uuid.UUID) -> None:
row = await self._session.get(TrackModel, track_id)
if row is not None:
await self._session.delete(row)
await self._session.flush()
async def genres(self) -> list[tuple[str, int]]:
"""Distinct non-null genres with their song counts, most common first.
Defined before ``list`` — the method named ``list`` shadows the builtin
in later annotations within the class body."""
rows = (
await self._session.execute(
select(TrackModel.genre, func.count(TrackModel.id).label("cnt"))
.where(TrackModel.genre.is_not(None))
.group_by(TrackModel.genre)
.order_by(func.count(TrackModel.id).desc())
)
).all()
return [(row.genre, row.cnt) for row in rows]
async def list(
self,
*,
artist_id: uuid.UUID | None,
album_id: uuid.UUID | None,
q: str | None,
sort_by: str = "created_at",
order: str = "desc",
limit: int = 50,
offset: int = 0,
) -> list[Track]:
stmt = select(TrackModel)
if artist_id is not None:
stmt = stmt.where(TrackModel.artist_id == artist_id)
if album_id is not None:
stmt = stmt.where(TrackModel.album_id == album_id)
if q:
stmt = stmt.where(TrackModel.title.ilike(f"%{q}%"))
if sort_by == "artist":
stmt = stmt.join(ArtistModel, TrackModel.artist_id == ArtistModel.id)
col_artist = ArtistModel.name
stmt = stmt.order_by(col_artist.asc() if order == "asc" else col_artist.desc())
elif sort_by == "title":
col_title = TrackModel.title
stmt = stmt.order_by(col_title.asc() if order == "asc" else col_title.desc())
else:
stmt = stmt.order_by(
TrackModel.created_at.asc() if order == "asc" else TrackModel.created_at.desc()
)
stmt = stmt.limit(limit).offset(offset)
rows = (await self._session.execute(stmt)).scalars().all()
return [_to_entity(r) for r in rows]
async def count(
self,
*,
artist_id: uuid.UUID | None,
album_id: uuid.UUID | None,
q: str | None,
) -> int:
stmt = select(func.count()).select_from(TrackModel)
if artist_id is not None:
stmt = stmt.where(TrackModel.artist_id == artist_id)
if album_id is not None:
stmt = stmt.where(TrackModel.album_id == album_id)
if q:
stmt = stmt.where(TrackModel.title.ilike(f"%{q}%"))
return (await self._session.execute(stmt)).scalar_one()
async def update(
self,
track_id: uuid.UUID,
*,
title: str | None,
genre: str | None,
year: int | None,
) -> Track:
row = await self._session.get(TrackModel, track_id)
if row is None:
raise NotFoundError(f"Track {track_id} not found.")
if title is not None:
row.title = title
if genre is not None:
row.genre = genre
if year is not None:
row.year = year
row.metadata_status = "manual"
await self._session.flush()
await self._session.refresh(row)
return _to_entity(row)
async def apply_enrichment(
self,
track_id: uuid.UUID,
*,
title: str,
artist_id: uuid.UUID,
album_id: uuid.UUID | None,
genre: str | None,
year: int | None,
track_number: int | None,
duration_seconds: int | None,
bitrate: int | None,
acoustid_fingerprint: str | None,
musicbrainz_id: str | None,
metadata_status: str,
metadata_error: str | None = None,
) -> Track:
row = await self._session.get(TrackModel, track_id)
if row is None:
raise NotFoundError(f"Track {track_id} not found.")
# Identity + status are authoritative for an enrichment run.
row.title = title
row.artist_id = artist_id
row.metadata_status = metadata_status
# A finished run always stamps outcome: clear/set the reason and mark the
# completion time so the UI can tell "still pending" from "done/failed".
row.metadata_error = metadata_error
row.enriched_at = dt.datetime.now(dt.UTC)
# Nullable extras: fill gaps only — never erase data a prior run found.
if album_id is not None:
row.album_id = album_id
if genre is not None:
row.genre = genre
if year is not None:
row.year = year
if track_number is not None:
row.track_number = track_number
if duration_seconds is not None:
row.duration_seconds = duration_seconds
if bitrate is not None:
row.bitrate = bitrate
if acoustid_fingerprint is not None:
row.acoustid_fingerprint = acoustid_fingerprint
if musicbrainz_id is not None:
row.musicbrainz_id = musicbrainz_id
await self._session.flush()
await self._session.refresh(row)
return _to_entity(row)
async def mark_enrichment_failed(self, track_id: uuid.UUID, *, error: str) -> None:
"""Record that an enrichment run crashed (unexpected exception). Runs in
its own session so the failure is persisted even though the run's own
transaction rolled back. Never overwrites ``manual`` (a no-op then), and
a missing track is a clean no-op."""
row = await self._session.get(TrackModel, track_id)
if row is None or row.metadata_status == "manual":
return
row.metadata_status = "failed"
row.metadata_error = error
row.enriched_at = dt.datetime.now(dt.UTC)
await self._session.flush()