diff --git a/app/api/deps.py b/app/api/deps.py index a0b18e1..d5ea6e9 100644 --- a/app/api/deps.py +++ b/app/api/deps.py @@ -15,6 +15,7 @@ from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer from sqlalchemy.ext.asyncio import AsyncSession from app.application.auth_service import AuthService +from app.application.metadata_service import MetadataEnrichmentService from app.application.streaming_service import StreamingService from app.application.subsonic_auth_service import SubsonicAuthService from app.application.upload_service import UploadService @@ -35,6 +36,9 @@ from app.infrastructure.db.repositories import ( SqlAlchemyTrackRepository, SqlAlchemyUserRepository, ) +from app.infrastructure.metadata.acoustid import AcoustIdHttpClient +from app.infrastructure.metadata.fingerprint import FpcalcFingerprinter +from app.infrastructure.metadata.tags import MutagenTagReader from app.infrastructure.sources.registry import SourceRegistry, build_source_registry from app.infrastructure.storage.provider import get_file_storage from app.workers.queue import enqueue_enrich @@ -132,8 +136,34 @@ def get_streaming_service(session: SessionDep, storage: FileStorageDep) -> Strea ) +def get_metadata_service( + session: SessionDep, storage: FileStorageDep +) -> MetadataEnrichmentService: + """Wires the §6.2 fingerprint/AcoustID adapters for read-only, inline use + (the metadata editor's "find matches" — §A7). The full pipeline (incl. + cover art) stays in the worker (`tasks/enrich_task.py`).""" + settings = get_settings() + api_key = settings.acoustid_api_key.get_secret_value() if settings.acoustid_api_key else None + acoustid = AcoustIdHttpClient( + api_key=api_key, + user_agent=settings.musicbrainz_user_agent, + api_url=settings.acoustid_api_url, + ) + return MetadataEnrichmentService( + tracks=SqlAlchemyTrackRepository(session), + artists=SqlAlchemyArtistRepository(session), + albums=SqlAlchemyAlbumRepository(session), + storage=storage, + tag_reader=MutagenTagReader(), + fingerprinter=FpcalcFingerprinter(settings.fpcalc_path), + acoustid=acoustid, + acoustid_trust_score=settings.acoustid_trust_score, + ) + + UploadServiceDep = Annotated[UploadService, Depends(get_upload_service)] StreamingServiceDep = Annotated[StreamingService, Depends(get_streaming_service)] +MetadataServiceDep = Annotated[MetadataEnrichmentService, Depends(get_metadata_service)] # -- library repository deps --------------------------------------------------- diff --git a/app/api/schemas/track.py b/app/api/schemas/track.py index f30f1be..9a79dd8 100644 --- a/app/api/schemas/track.py +++ b/app/api/schemas/track.py @@ -16,6 +16,9 @@ class TrackOut(BaseModel): duration_seconds: int | None file_format: str file_size: int + genre: str | None + year: int | None + track_number: int | None metadata_status: str metadata_error: str | None enriched_at: dt.datetime | None @@ -28,3 +31,33 @@ class TrackUpdate(BaseModel): title: str | None = None genre: str | None = None year: int | None = None + + +class MetadataMatch(BaseModel): + """One AcoustID candidate for the metadata editor's match picker (§A7).""" + + acoustid: str + score: float + recording_mbid: str | None + release_group_mbid: str | None + title: str | None + artist: str | None + album: str | None + year: int | None + + +class MetadataMatchesOut(BaseModel): + items: list[MetadataMatch] + + +class MetadataApply(BaseModel): + """Manual edits / accepted match applied via ``PUT /tracks/{id}/metadata``. + + Sets ``metadata_status = manual`` (never overwritten by auto-enrichment).""" + + title: str | None = None + artist_name: str | None = None + album_title: str | None = None + year: int | None = None + genre: str | None = None + track_number: int | None = None diff --git a/app/api/v1/tracks.py b/app/api/v1/tracks.py index d873895..39e2e8e 100644 --- a/app/api/v1/tracks.py +++ b/app/api/v1/tracks.py @@ -12,11 +12,18 @@ from app.api.deps import ( ArtistRepoDep, CurrentUser, FileStorageDep, + MetadataServiceDep, StreamUser, TrackRepoDep, ) from app.api.schemas.pagination import PagedResponse -from app.api.schemas.track import TrackOut, TrackUpdate +from app.api.schemas.track import ( + MetadataApply, + MetadataMatch, + MetadataMatchesOut, + TrackOut, + TrackUpdate, +) from app.domain.entities.album import Album from app.domain.entities.track import Artist, Track from app.domain.errors import NotFoundError @@ -41,6 +48,9 @@ async def _build_track_out( duration_seconds=t.duration_seconds, file_format=t.file_format, file_size=t.file_size, + genre=t.genre, + year=t.year, + track_number=t.track_number, metadata_status=t.metadata_status, metadata_error=t.metadata_error, enriched_at=t.enriched_at, @@ -187,8 +197,83 @@ async def enrich_metadata( @router.get("/{track_id}/metadata/matches") -async def get_metadata_matches(track_id: uuid.UUID, _: CurrentUser) -> Any: ... +async def get_metadata_matches( + track_id: uuid.UUID, + track_repo: TrackRepoDep, + metadata_service: MetadataServiceDep, + _: CurrentUser, +) -> MetadataMatchesOut: + """AcoustID candidates for the metadata editor's match picker (§A7). + + Runs the fingerprint lookup inline (single track, user-triggered) and + never mutates the track. Degrades to an empty list if fpcalc/AcoustID are + unavailable or no match is found. + """ + track = await track_repo.get_by_id(track_id) + if track is None: + raise NotFoundError(f"Track {track_id} not found.") + matches = await metadata_service.find_matches(track_id) + return MetadataMatchesOut( + items=[ + MetadataMatch( + acoustid=m.acoustid, + score=m.score, + recording_mbid=m.recording_mbid, + release_group_mbid=m.release_group_mbid, + title=m.title, + artist=m.artist, + album=m.album, + year=m.year, + ) + for m in matches + ] + ) @router.put("/{track_id}/metadata") -async def set_metadata(track_id: uuid.UUID, _: CurrentUser) -> Any: ... +async def set_metadata( + track_id: uuid.UUID, + body: MetadataApply, + track_repo: TrackRepoDep, + artist_repo: ArtistRepoDep, + album_repo: AlbumRepoDep, + _: CurrentUser, +) -> TrackOut: + """Apply manual edits or an accepted AcoustID match (§A7). Sets + ``metadata_status = manual`` — never overwritten by auto-enrichment.""" + track = await track_repo.get_by_id(track_id) + if track is None: + raise NotFoundError(f"Track {track_id} not found.") + + artist_id: uuid.UUID | None = None + if body.artist_name: + artist = await artist_repo.get_or_create(body.artist_name) + artist_id = artist.id + + album_id: uuid.UUID | None = None + if body.album_title: + album = await album_repo.get_or_create( + title=body.album_title, + artist_id=artist_id or track.artist_id, + year=body.year, + musicbrainz_id=None, + ) + album_id = album.id + + track = await track_repo.update( + track_id, + title=body.title, + genre=body.genre, + year=body.year, + artist_id=artist_id, + album_id=album_id, + track_number=body.track_number, + ) + + artist_ids = [track.artist_id] + album_ids = [track.album_id] if track.album_id else [] + artists = {a.id: a for a in await artist_repo.get_many(artist_ids)} + albums = {a.id: a for a in await album_repo.get_many(album_ids)} + + items = await _build_track_out([track], artists, albums) + return items[0] diff --git a/app/application/metadata_service.py b/app/application/metadata_service.py index fececf3..9e2db87 100644 --- a/app/application/metadata_service.py +++ b/app/application/metadata_service.py @@ -162,6 +162,29 @@ class MetadataEnrichmentService: return "No metadata match: AcoustID lookup is unavailable (no API key)." return "No metadata match found in tags or AcoustID." + async def find_matches(self, track_id: uuid.UUID) -> list[RecordingMatch]: + """AcoustID candidates for the metadata editor's match picker (§A7). + + Read-only — unlike :meth:`enrich`, never touches the track. Runs + inline (single track, user-triggered) rather than via the worker. + Degrades to ``[]`` whenever fingerprinting/AcoustID is unavailable or + the file can't be read, same as the enrichment pipeline. + """ + track = await self._tracks.get_by_id(track_id) + if track is None: + return [] + if not self._acoustid.is_available() or not self._fingerprinter.is_available(): + return [] + try: + async with self._storage.as_local_path(track.storage_uri) as path: + fingerprint = await self._fingerprinter.calculate(path) + if fingerprint is None: + return [] + return await self._acoustid.lookup_all(fingerprint) + except Exception: + log.warning("find_matches_failed", track_id=str(track_id)) + return [] + async def _read_local(self, storage_uri: str) -> AudioTags | None: try: async with self._storage.as_local_path(storage_uri) as path: diff --git a/app/domain/entities/track.py b/app/domain/entities/track.py index 10fb4d6..99eff3f 100644 --- a/app/domain/entities/track.py +++ b/app/domain/entities/track.py @@ -27,6 +27,7 @@ class Track: duration_seconds: int | None genre: str | None year: int | None + track_number: int | None metadata_status: str metadata_error: str | None enriched_at: dt.datetime | None diff --git a/app/domain/ports.py b/app/domain/ports.py index 25b8f6d..5e73f9d 100644 --- a/app/domain/ports.py +++ b/app/domain/ports.py @@ -302,10 +302,13 @@ class AudioFingerprinter(Protocol): class AcoustIdClient(Protocol): """AcoustID lookup. ``is_available`` is False without an API key (the whole fingerprint path is then skipped). ``lookup`` returns the best match or - ``None`` (no result / service down), never raising.""" + ``None`` (no result / service down), never raising. ``lookup_all`` returns + the same candidates ranked by confidence (``[]`` on no result / unavailable + / error), for the metadata editor's match picker.""" def is_available(self) -> bool: ... async def lookup(self, fingerprint: Fingerprint) -> RecordingMatch | None: ... + async def lookup_all(self, fingerprint: Fingerprint) -> list[RecordingMatch]: ... class CoverArtExtractor(Protocol): diff --git a/app/infrastructure/db/repositories/like_repository.py b/app/infrastructure/db/repositories/like_repository.py index d71e2e8..5d0adae 100644 --- a/app/infrastructure/db/repositories/like_repository.py +++ b/app/infrastructure/db/repositories/like_repository.py @@ -38,6 +38,7 @@ def _track_to_entity(row: TrackModel) -> Track: duration_seconds=row.duration_seconds, genre=row.genre, year=row.year, + track_number=row.track_number, metadata_status=row.metadata_status, metadata_error=row.metadata_error, enriched_at=row.enriched_at, diff --git a/app/infrastructure/db/repositories/playlist_repository.py b/app/infrastructure/db/repositories/playlist_repository.py index be5ca8f..014a889 100644 --- a/app/infrastructure/db/repositories/playlist_repository.py +++ b/app/infrastructure/db/repositories/playlist_repository.py @@ -37,6 +37,7 @@ def _track_to_entity(row: TrackModel) -> Track: duration_seconds=row.duration_seconds, genre=row.genre, year=row.year, + track_number=row.track_number, metadata_status=row.metadata_status, metadata_error=row.metadata_error, enriched_at=row.enriched_at, diff --git a/app/infrastructure/db/repositories/track_repository.py b/app/infrastructure/db/repositories/track_repository.py index 6e27481..00a5c4c 100644 --- a/app/infrastructure/db/repositories/track_repository.py +++ b/app/infrastructure/db/repositories/track_repository.py @@ -26,6 +26,7 @@ def _to_entity(row: TrackModel) -> Track: duration_seconds=row.duration_seconds, genre=row.genre, year=row.year, + track_number=row.track_number, metadata_status=row.metadata_status, metadata_error=row.metadata_error, enriched_at=row.enriched_at, @@ -162,6 +163,9 @@ class SqlAlchemyTrackRepository: title: str | None, genre: str | None, year: int | None, + artist_id: uuid.UUID | None = None, + album_id: uuid.UUID | None = None, + track_number: int | None = None, ) -> Track: row = await self._session.get(TrackModel, track_id) if row is None: @@ -172,6 +176,12 @@ class SqlAlchemyTrackRepository: row.genre = genre if year is not None: row.year = year + if artist_id is not None: + row.artist_id = artist_id + if album_id is not None: + row.album_id = album_id + if track_number is not None: + row.track_number = track_number row.metadata_status = "manual" await self._session.flush() await self._session.refresh(row) diff --git a/app/infrastructure/metadata/acoustid.py b/app/infrastructure/metadata/acoustid.py index b501acc..11fcbf4 100644 --- a/app/infrastructure/metadata/acoustid.py +++ b/app/infrastructure/metadata/acoustid.py @@ -46,6 +46,18 @@ class AcoustIdHttpClient: return bool(self._api_key) async def lookup(self, fingerprint: Fingerprint) -> RecordingMatch | None: + payload = await self._lookup_raw(fingerprint) + if payload is None: + return None + return _parse_best_match(payload) + + async def lookup_all(self, fingerprint: Fingerprint) -> list[RecordingMatch]: + payload = await self._lookup_raw(fingerprint) + if payload is None: + return [] + return _parse_matches(payload) + + async def _lookup_raw(self, fingerprint: Fingerprint) -> object | None: if not self._api_key: return None try: @@ -65,13 +77,11 @@ class AcoustIdHttpClient: }, ) resp.raise_for_status() - payload = resp.json() + return resp.json() # type: ignore[no-any-return] except httpx.HTTPError, ValueError: log.warning("acoustid_lookup_failed") return None - return _parse_best_match(payload) - @classmethod async def _throttle(cls) -> None: async with cls._throttle_lock: @@ -82,22 +92,39 @@ class AcoustIdHttpClient: cls._last_call_monotonic = time.monotonic() +_MAX_MATCHES = 5 + + def _parse_best_match(payload: object) -> RecordingMatch | None: + matches = _parse_matches(payload) + return matches[0] if matches else None + + +def _parse_matches(payload: object) -> list[RecordingMatch]: if not isinstance(payload, dict) or payload.get("status") != "ok": - return None + return [] results = payload.get("results") if not isinstance(results, list) or not results: - return None + return [] - # Results are returned best-score-first; take the top scoring one. - best = max(results, key=lambda r: r.get("score", 0.0) if isinstance(r, dict) else 0.0) - if not isinstance(best, dict): - return None + # Results are returned best-score-first, but sort defensively and cap the + # number of candidates surfaced to the editor. + candidates = [r for r in results if isinstance(r, dict)] + candidates.sort(key=lambda r: r.get("score", 0.0), reverse=True) - acoustid = best.get("id") + matches: list[RecordingMatch] = [] + for result in candidates[:_MAX_MATCHES]: + match = _parse_one(result) + if match is not None: + matches.append(match) + return matches + + +def _parse_one(result: dict[str, object]) -> RecordingMatch | None: + acoustid = result.get("id") if not isinstance(acoustid, str): return None - score = float(best.get("score", 0.0)) + score = float(result.get("score", 0.0)) # type: ignore[arg-type] recording_mbid: str | None = None release_group_mbid: str | None = None @@ -105,7 +132,7 @@ def _parse_best_match(payload: object) -> RecordingMatch | None: artist: str | None = None album: str | None = None - recordings = best.get("recordings") + recordings = result.get("recordings") if isinstance(recordings, list) and recordings and isinstance(recordings[0], dict): rec = recordings[0] recording_mbid = rec.get("id") if isinstance(rec.get("id"), str) else None diff --git a/tests/test_import_service.py b/tests/test_import_service.py index 9585fe1..ffbc13a 100644 --- a/tests/test_import_service.py +++ b/tests/test_import_service.py @@ -51,6 +51,7 @@ class FakeTrackRepo: duration_seconds=None, genre=None, year=None, + track_number=None, metadata_status=str(kw["metadata_status"]), metadata_error=None, enriched_at=None, @@ -134,6 +135,7 @@ async def test_dedup_skips_already_imported() -> None: duration_seconds=None, genre=None, year=None, + track_number=None, metadata_status="pending", metadata_error=None, enriched_at=None, diff --git a/tests/test_metadata_api.py b/tests/test_metadata_api.py new file mode 100644 index 0000000..508bbaf --- /dev/null +++ b/tests/test_metadata_api.py @@ -0,0 +1,200 @@ +"""Integration tests for the metadata-editor endpoints (§A7, §1H). + +Requires a reachable Postgres; skips otherwise. +""" + +import asyncio +import os +from collections.abc import AsyncIterator +from pathlib import Path + +import pytest +from app.core.config import get_settings +from app.infrastructure.db import Base, dispose_engine, get_engine, session_scope +from app.infrastructure.db.repositories import ( + SqlAlchemyRefreshTokenRepository, + SqlAlchemyUserRepository, +) +from asgi_lifespan import LifespanManager +from httpx import ASGITransport, AsyncClient + +pytestmark = pytest.mark.asyncio + +_db_reachable_cache: bool | None = None + + +async def _db_reachable() -> bool: + global _db_reachable_cache + if _db_reachable_cache is not None: + return _db_reachable_cache + + from sqlalchemy import text + + try: + async with asyncio.timeout(3): + async with get_engine().connect() as conn: + await conn.execute(text("SELECT 1")) + _db_reachable_cache = True + except Exception: + _db_reachable_cache = False + return _db_reachable_cache + + +@pytest.fixture +async def api(tmp_path: Path) -> AsyncIterator[AsyncClient]: + if not await _db_reachable(): + pytest.skip("Postgres not reachable — integration test skipped.") + + os.environ["MEDIA_PATH"] = str(tmp_path) + get_settings.cache_clear() + + import app.infrastructure.storage.provider as _storage_provider + + _storage_provider._storage = None + + try: + async with get_engine().begin() as conn: + await conn.run_sync(Base.metadata.drop_all) + await conn.run_sync(Base.metadata.create_all) + + from app.application.user_service import UserService + from app.core.security import Argon2PasswordHasher + + async with session_scope() as session: + await UserService( + users=SqlAlchemyUserRepository(session), + refresh_tokens=SqlAlchemyRefreshTokenRepository(session), + hasher=Argon2PasswordHasher(), + ).create_user(username="testuser", password="testpass1", is_superuser=False) + + from app.main import create_app + + app = create_app() + async with LifespanManager(app): + transport = ASGITransport(app=app) + async with AsyncClient(transport=transport, base_url="http://test") as client: + yield client + + async with get_engine().begin() as conn: + await conn.run_sync(Base.metadata.drop_all) + await dispose_engine() + finally: + _storage_provider._storage = None + os.environ.pop("MEDIA_PATH", None) + get_settings.cache_clear() + + +async def _login(api: AsyncClient) -> str: + resp = await api.post( + "/api/v1/auth/login", json={"username": "testuser", "password": "testpass1"} + ) + assert resp.status_code == 200 + return str(resp.json()["access_token"]) + + +async def _upload(api: AsyncClient, token: str, *, name: str = "song.mp3") -> str: + audio = b"fake audio bytes for metadata test" * 10 + resp = await api.post( + "/api/v1/upload", + files={"file": (name, audio, "audio/mpeg")}, + headers={"Authorization": f"Bearer {token}"}, + ) + assert resp.status_code == 200, resp.text + return str(resp.json()["track_id"]) + + +async def test_track_out_includes_genre_year_track_number(api: AsyncClient) -> None: + token = await _login(api) + track_id = await _upload(api, token) + + resp = await api.get( + f"/api/v1/tracks/{track_id}", headers={"Authorization": f"Bearer {token}"} + ) + assert resp.status_code == 200, resp.text + body = resp.json() + assert "genre" in body + assert "year" in body + assert "track_number" in body + + +async def test_metadata_matches_degrades_without_acoustid(api: AsyncClient) -> None: + # No ACOUSTID_API_KEY / fpcalc configured in the test environment — the + # endpoint must degrade to an empty list, not error. + token = await _login(api) + track_id = await _upload(api, token) + + resp = await api.get( + f"/api/v1/tracks/{track_id}/metadata/matches", + headers={"Authorization": f"Bearer {token}"}, + ) + assert resp.status_code == 200, resp.text + assert resp.json() == {"items": []} + + +async def test_metadata_matches_not_found(api: AsyncClient) -> None: + token = await _login(api) + resp = await api.get( + "/api/v1/tracks/00000000-0000-0000-0000-000000000000/metadata/matches", + headers={"Authorization": f"Bearer {token}"}, + ) + assert resp.status_code == 404 + + +async def test_apply_metadata_updates_fields_and_sets_manual(api: AsyncClient) -> None: + token = await _login(api) + track_id = await _upload(api, token) + headers = {"Authorization": f"Bearer {token}"} + + resp = await api.put( + f"/api/v1/tracks/{track_id}/metadata", + json={ + "title": "New Title", + "artist_name": "New Artist", + "album_title": "New Album", + "year": 1999, + "genre": "Rock", + "track_number": 3, + }, + headers=headers, + ) + assert resp.status_code == 200, resp.text + body = resp.json() + assert body["title"] == "New Title" + assert body["artist_name"] == "New Artist" + assert body["album_title"] == "New Album" + assert body["year"] == 1999 + assert body["genre"] == "Rock" + assert body["track_number"] == 3 + assert body["metadata_status"] == "manual" + + # Re-fetch to confirm persistence. + again = await api.get(f"/api/v1/tracks/{track_id}", headers=headers) + assert again.status_code == 200 + assert again.json()["title"] == "New Title" + assert again.json()["metadata_status"] == "manual" + + +async def test_apply_metadata_partial_update(api: AsyncClient) -> None: + token = await _login(api) + track_id = await _upload(api, token) + headers = {"Authorization": f"Bearer {token}"} + + resp = await api.put( + f"/api/v1/tracks/{track_id}/metadata", + json={"genre": "Jazz"}, + headers=headers, + ) + assert resp.status_code == 200, resp.text + body = resp.json() + assert body["genre"] == "Jazz" + assert body["metadata_status"] == "manual" + + +async def test_apply_metadata_not_found(api: AsyncClient) -> None: + token = await _login(api) + resp = await api.put( + "/api/v1/tracks/00000000-0000-0000-0000-000000000000/metadata", + json={"title": "x"}, + headers={"Authorization": f"Bearer {token}"}, + ) + assert resp.status_code == 404 diff --git a/tests/test_metadata_service.py b/tests/test_metadata_service.py index 727efa6..2d6d7aa 100644 --- a/tests/test_metadata_service.py +++ b/tests/test_metadata_service.py @@ -38,6 +38,7 @@ def _track(*, metadata_status: str = "pending", title: str = "raw-stem") -> Trac duration_seconds=None, genre=None, year=None, + track_number=None, metadata_status=metadata_status, metadata_error=None, enriched_at=None, @@ -165,6 +166,10 @@ class FakeAcoustId: self.calls += 1 return self._match + async def lookup_all(self, fingerprint: Fingerprint) -> list[RecordingMatch]: + self.calls += 1 + return [self._match] if self._match is not None else [] + def _service( *, diff --git a/tests/test_real_file_enrichment.py b/tests/test_real_file_enrichment.py index 96dc11a..023738a 100644 --- a/tests/test_real_file_enrichment.py +++ b/tests/test_real_file_enrichment.py @@ -145,6 +145,7 @@ def _pending_track() -> Track: duration_seconds=None, genre=None, year=None, + track_number=None, metadata_status="pending", metadata_error=None, enriched_at=None,