feat(metadata): implement single-track metadata editor API (§A7/§1H)
Docker Build & Publish / Prune old image versions (push) Has been cancelled
Docker Build & Publish / build (push) Has been cancelled
Docker Build & Publish / push (push) Has been cancelled

Adds inline AcoustID match-finding (multiple ranked candidates via
lookup_all) and PUT /tracks/{id}/metadata for manual edits, resolving
artist/album and setting metadata_status=manual. Extends TrackOut with
genre/year/track_number.
This commit is contained in:
Senko-san
2026-06-13 14:34:43 +03:00
parent 73d7da440f
commit 63c7d05eca
14 changed files with 438 additions and 16 deletions
+30
View File
@@ -15,6 +15,7 @@ from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
from sqlalchemy.ext.asyncio import AsyncSession
from app.application.auth_service import AuthService
from app.application.metadata_service import MetadataEnrichmentService
from app.application.streaming_service import StreamingService
from app.application.subsonic_auth_service import SubsonicAuthService
from app.application.upload_service import UploadService
@@ -35,6 +36,9 @@ from app.infrastructure.db.repositories import (
SqlAlchemyTrackRepository,
SqlAlchemyUserRepository,
)
from app.infrastructure.metadata.acoustid import AcoustIdHttpClient
from app.infrastructure.metadata.fingerprint import FpcalcFingerprinter
from app.infrastructure.metadata.tags import MutagenTagReader
from app.infrastructure.sources.registry import SourceRegistry, build_source_registry
from app.infrastructure.storage.provider import get_file_storage
from app.workers.queue import enqueue_enrich
@@ -132,8 +136,34 @@ def get_streaming_service(session: SessionDep, storage: FileStorageDep) -> Strea
)
def get_metadata_service(
session: SessionDep, storage: FileStorageDep
) -> MetadataEnrichmentService:
"""Wires the §6.2 fingerprint/AcoustID adapters for read-only, inline use
(the metadata editor's "find matches" — §A7). The full pipeline (incl.
cover art) stays in the worker (`tasks/enrich_task.py`)."""
settings = get_settings()
api_key = settings.acoustid_api_key.get_secret_value() if settings.acoustid_api_key else None
acoustid = AcoustIdHttpClient(
api_key=api_key,
user_agent=settings.musicbrainz_user_agent,
api_url=settings.acoustid_api_url,
)
return MetadataEnrichmentService(
tracks=SqlAlchemyTrackRepository(session),
artists=SqlAlchemyArtistRepository(session),
albums=SqlAlchemyAlbumRepository(session),
storage=storage,
tag_reader=MutagenTagReader(),
fingerprinter=FpcalcFingerprinter(settings.fpcalc_path),
acoustid=acoustid,
acoustid_trust_score=settings.acoustid_trust_score,
)
UploadServiceDep = Annotated[UploadService, Depends(get_upload_service)]
StreamingServiceDep = Annotated[StreamingService, Depends(get_streaming_service)]
MetadataServiceDep = Annotated[MetadataEnrichmentService, Depends(get_metadata_service)]
# -- library repository deps ---------------------------------------------------
+33
View File
@@ -16,6 +16,9 @@ class TrackOut(BaseModel):
duration_seconds: int | None
file_format: str
file_size: int
genre: str | None
year: int | None
track_number: int | None
metadata_status: str
metadata_error: str | None
enriched_at: dt.datetime | None
@@ -28,3 +31,33 @@ class TrackUpdate(BaseModel):
title: str | None = None
genre: str | None = None
year: int | None = None
class MetadataMatch(BaseModel):
"""One AcoustID candidate for the metadata editor's match picker (§A7)."""
acoustid: str
score: float
recording_mbid: str | None
release_group_mbid: str | None
title: str | None
artist: str | None
album: str | None
year: int | None
class MetadataMatchesOut(BaseModel):
items: list[MetadataMatch]
class MetadataApply(BaseModel):
"""Manual edits / accepted match applied via ``PUT /tracks/{id}/metadata``.
Sets ``metadata_status = manual`` (never overwritten by auto-enrichment)."""
title: str | None = None
artist_name: str | None = None
album_title: str | None = None
year: int | None = None
genre: str | None = None
track_number: int | None = None
+88 -3
View File
@@ -12,11 +12,18 @@ from app.api.deps import (
ArtistRepoDep,
CurrentUser,
FileStorageDep,
MetadataServiceDep,
StreamUser,
TrackRepoDep,
)
from app.api.schemas.pagination import PagedResponse
from app.api.schemas.track import TrackOut, TrackUpdate
from app.api.schemas.track import (
MetadataApply,
MetadataMatch,
MetadataMatchesOut,
TrackOut,
TrackUpdate,
)
from app.domain.entities.album import Album
from app.domain.entities.track import Artist, Track
from app.domain.errors import NotFoundError
@@ -41,6 +48,9 @@ async def _build_track_out(
duration_seconds=t.duration_seconds,
file_format=t.file_format,
file_size=t.file_size,
genre=t.genre,
year=t.year,
track_number=t.track_number,
metadata_status=t.metadata_status,
metadata_error=t.metadata_error,
enriched_at=t.enriched_at,
@@ -187,8 +197,83 @@ async def enrich_metadata(
@router.get("/{track_id}/metadata/matches")
async def get_metadata_matches(track_id: uuid.UUID, _: CurrentUser) -> Any: ...
async def get_metadata_matches(
track_id: uuid.UUID,
track_repo: TrackRepoDep,
metadata_service: MetadataServiceDep,
_: CurrentUser,
) -> MetadataMatchesOut:
"""AcoustID candidates for the metadata editor's match picker (§A7).
Runs the fingerprint lookup inline (single track, user-triggered) and
never mutates the track. Degrades to an empty list if fpcalc/AcoustID are
unavailable or no match is found.
"""
track = await track_repo.get_by_id(track_id)
if track is None:
raise NotFoundError(f"Track {track_id} not found.")
matches = await metadata_service.find_matches(track_id)
return MetadataMatchesOut(
items=[
MetadataMatch(
acoustid=m.acoustid,
score=m.score,
recording_mbid=m.recording_mbid,
release_group_mbid=m.release_group_mbid,
title=m.title,
artist=m.artist,
album=m.album,
year=m.year,
)
for m in matches
]
)
@router.put("/{track_id}/metadata")
async def set_metadata(track_id: uuid.UUID, _: CurrentUser) -> Any: ...
async def set_metadata(
track_id: uuid.UUID,
body: MetadataApply,
track_repo: TrackRepoDep,
artist_repo: ArtistRepoDep,
album_repo: AlbumRepoDep,
_: CurrentUser,
) -> TrackOut:
"""Apply manual edits or an accepted AcoustID match (§A7). Sets
``metadata_status = manual`` — never overwritten by auto-enrichment."""
track = await track_repo.get_by_id(track_id)
if track is None:
raise NotFoundError(f"Track {track_id} not found.")
artist_id: uuid.UUID | None = None
if body.artist_name:
artist = await artist_repo.get_or_create(body.artist_name)
artist_id = artist.id
album_id: uuid.UUID | None = None
if body.album_title:
album = await album_repo.get_or_create(
title=body.album_title,
artist_id=artist_id or track.artist_id,
year=body.year,
musicbrainz_id=None,
)
album_id = album.id
track = await track_repo.update(
track_id,
title=body.title,
genre=body.genre,
year=body.year,
artist_id=artist_id,
album_id=album_id,
track_number=body.track_number,
)
artist_ids = [track.artist_id]
album_ids = [track.album_id] if track.album_id else []
artists = {a.id: a for a in await artist_repo.get_many(artist_ids)}
albums = {a.id: a for a in await album_repo.get_many(album_ids)}
items = await _build_track_out([track], artists, albums)
return items[0]
+23
View File
@@ -162,6 +162,29 @@ class MetadataEnrichmentService:
return "No metadata match: AcoustID lookup is unavailable (no API key)."
return "No metadata match found in tags or AcoustID."
async def find_matches(self, track_id: uuid.UUID) -> list[RecordingMatch]:
"""AcoustID candidates for the metadata editor's match picker (§A7).
Read-only — unlike :meth:`enrich`, never touches the track. Runs
inline (single track, user-triggered) rather than via the worker.
Degrades to ``[]`` whenever fingerprinting/AcoustID is unavailable or
the file can't be read, same as the enrichment pipeline.
"""
track = await self._tracks.get_by_id(track_id)
if track is None:
return []
if not self._acoustid.is_available() or not self._fingerprinter.is_available():
return []
try:
async with self._storage.as_local_path(track.storage_uri) as path:
fingerprint = await self._fingerprinter.calculate(path)
if fingerprint is None:
return []
return await self._acoustid.lookup_all(fingerprint)
except Exception:
log.warning("find_matches_failed", track_id=str(track_id))
return []
async def _read_local(self, storage_uri: str) -> AudioTags | None:
try:
async with self._storage.as_local_path(storage_uri) as path:
+1
View File
@@ -27,6 +27,7 @@ class Track:
duration_seconds: int | None
genre: str | None
year: int | None
track_number: int | None
metadata_status: str
metadata_error: str | None
enriched_at: dt.datetime | None
+4 -1
View File
@@ -302,10 +302,13 @@ class AudioFingerprinter(Protocol):
class AcoustIdClient(Protocol):
"""AcoustID lookup. ``is_available`` is False without an API key (the whole
fingerprint path is then skipped). ``lookup`` returns the best match or
``None`` (no result / service down), never raising."""
``None`` (no result / service down), never raising. ``lookup_all`` returns
the same candidates ranked by confidence (``[]`` on no result / unavailable
/ error), for the metadata editor's match picker."""
def is_available(self) -> bool: ...
async def lookup(self, fingerprint: Fingerprint) -> RecordingMatch | None: ...
async def lookup_all(self, fingerprint: Fingerprint) -> list[RecordingMatch]: ...
class CoverArtExtractor(Protocol):
@@ -38,6 +38,7 @@ def _track_to_entity(row: TrackModel) -> Track:
duration_seconds=row.duration_seconds,
genre=row.genre,
year=row.year,
track_number=row.track_number,
metadata_status=row.metadata_status,
metadata_error=row.metadata_error,
enriched_at=row.enriched_at,
@@ -37,6 +37,7 @@ def _track_to_entity(row: TrackModel) -> Track:
duration_seconds=row.duration_seconds,
genre=row.genre,
year=row.year,
track_number=row.track_number,
metadata_status=row.metadata_status,
metadata_error=row.metadata_error,
enriched_at=row.enriched_at,
@@ -26,6 +26,7 @@ def _to_entity(row: TrackModel) -> Track:
duration_seconds=row.duration_seconds,
genre=row.genre,
year=row.year,
track_number=row.track_number,
metadata_status=row.metadata_status,
metadata_error=row.metadata_error,
enriched_at=row.enriched_at,
@@ -162,6 +163,9 @@ class SqlAlchemyTrackRepository:
title: str | None,
genre: str | None,
year: int | None,
artist_id: uuid.UUID | None = None,
album_id: uuid.UUID | None = None,
track_number: int | None = None,
) -> Track:
row = await self._session.get(TrackModel, track_id)
if row is None:
@@ -172,6 +176,12 @@ class SqlAlchemyTrackRepository:
row.genre = genre
if year is not None:
row.year = year
if artist_id is not None:
row.artist_id = artist_id
if album_id is not None:
row.album_id = album_id
if track_number is not None:
row.track_number = track_number
row.metadata_status = "manual"
await self._session.flush()
await self._session.refresh(row)
+39 -12
View File
@@ -46,6 +46,18 @@ class AcoustIdHttpClient:
return bool(self._api_key)
async def lookup(self, fingerprint: Fingerprint) -> RecordingMatch | None:
payload = await self._lookup_raw(fingerprint)
if payload is None:
return None
return _parse_best_match(payload)
async def lookup_all(self, fingerprint: Fingerprint) -> list[RecordingMatch]:
payload = await self._lookup_raw(fingerprint)
if payload is None:
return []
return _parse_matches(payload)
async def _lookup_raw(self, fingerprint: Fingerprint) -> object | None:
if not self._api_key:
return None
try:
@@ -65,13 +77,11 @@ class AcoustIdHttpClient:
},
)
resp.raise_for_status()
payload = resp.json()
return resp.json() # type: ignore[no-any-return]
except httpx.HTTPError, ValueError:
log.warning("acoustid_lookup_failed")
return None
return _parse_best_match(payload)
@classmethod
async def _throttle(cls) -> None:
async with cls._throttle_lock:
@@ -82,22 +92,39 @@ class AcoustIdHttpClient:
cls._last_call_monotonic = time.monotonic()
_MAX_MATCHES = 5
def _parse_best_match(payload: object) -> RecordingMatch | None:
matches = _parse_matches(payload)
return matches[0] if matches else None
def _parse_matches(payload: object) -> list[RecordingMatch]:
if not isinstance(payload, dict) or payload.get("status") != "ok":
return None
return []
results = payload.get("results")
if not isinstance(results, list) or not results:
return None
return []
# Results are returned best-score-first; take the top scoring one.
best = max(results, key=lambda r: r.get("score", 0.0) if isinstance(r, dict) else 0.0)
if not isinstance(best, dict):
return None
# Results are returned best-score-first, but sort defensively and cap the
# number of candidates surfaced to the editor.
candidates = [r for r in results if isinstance(r, dict)]
candidates.sort(key=lambda r: r.get("score", 0.0), reverse=True)
acoustid = best.get("id")
matches: list[RecordingMatch] = []
for result in candidates[:_MAX_MATCHES]:
match = _parse_one(result)
if match is not None:
matches.append(match)
return matches
def _parse_one(result: dict[str, object]) -> RecordingMatch | None:
acoustid = result.get("id")
if not isinstance(acoustid, str):
return None
score = float(best.get("score", 0.0))
score = float(result.get("score", 0.0)) # type: ignore[arg-type]
recording_mbid: str | None = None
release_group_mbid: str | None = None
@@ -105,7 +132,7 @@ def _parse_best_match(payload: object) -> RecordingMatch | None:
artist: str | None = None
album: str | None = None
recordings = best.get("recordings")
recordings = result.get("recordings")
if isinstance(recordings, list) and recordings and isinstance(recordings[0], dict):
rec = recordings[0]
recording_mbid = rec.get("id") if isinstance(rec.get("id"), str) else None