feat(enrichment): tag-first metadata pipeline (§1D)
Implements the §6.2 enrichment pipeline: embedded tags → Chromaprint
fingerprint → AcoustID lookup. Well-tagged files get correct
artist/album/title offline; the rest are identified via AcoustID
(which also yields a MusicBrainz recording id in one call).
- domain: AudioTags/Fingerprint/RecordingMatch value objects; ports
AudioTagReader, AudioFingerprinter, AcoustIdClient; TrackRepository
.apply_enrichment (gap-fill, never erases) + AlbumRepository.get_or_create
- infrastructure/metadata: MutagenTagReader, FpcalcFingerprinter,
AcoustIdHttpClient (rich meta=recordings+releasegroups, throttled)
- application: MetadataEnrichmentService — tags preferred, AcoustID fills
gaps; resolves artist/album; status enriched/failed; skips manual;
every external step wrapped (graceful degradation)
- workers: enrich_task registered; enqueue_enrich is best-effort and
deferred so the caller's txn commits before the worker reads the row
- wiring: upload enqueues after add; import returns imported_ids and
enqueues post-commit (mid-scan would race the worker); manual
POST /tracks/{id}/metadata/enrich endpoint
- deps: add mutagen (fpcalc/ffmpeg already in the image)
Tests: metadata service orchestration, AcoustID parser, tag helpers.
125 passed; mypy strict + ruff clean.
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -37,6 +37,7 @@ from app.infrastructure.db.repositories import (
|
|||||||
)
|
)
|
||||||
from app.infrastructure.sources.registry import SourceRegistry, build_source_registry
|
from app.infrastructure.sources.registry import SourceRegistry, build_source_registry
|
||||||
from app.infrastructure.storage.provider import get_file_storage
|
from app.infrastructure.storage.provider import get_file_storage
|
||||||
|
from app.workers.queue import enqueue_enrich
|
||||||
|
|
||||||
|
|
||||||
async def get_session() -> AsyncIterator[AsyncSession]:
|
async def get_session() -> AsyncIterator[AsyncSession]:
|
||||||
@@ -120,6 +121,7 @@ def get_upload_service(session: SessionDep, storage: FileStorageDep) -> UploadSe
|
|||||||
artists=SqlAlchemyArtistRepository(session),
|
artists=SqlAlchemyArtistRepository(session),
|
||||||
storage=storage,
|
storage=storage,
|
||||||
tmp_dir=settings.upload_tmp_dir,
|
tmp_dir=settings.upload_tmp_dir,
|
||||||
|
enqueue_enrich=enqueue_enrich,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
+13
-1
@@ -11,6 +11,7 @@ from app.api.schemas.track import TrackOut, TrackUpdate
|
|||||||
from app.domain.entities.album import Album
|
from app.domain.entities.album import Album
|
||||||
from app.domain.entities.track import Artist, Track
|
from app.domain.entities.track import Artist, Track
|
||||||
from app.domain.errors import NotFoundError
|
from app.domain.errors import NotFoundError
|
||||||
|
from app.workers.queue import enqueue
|
||||||
|
|
||||||
router = APIRouter(prefix="/tracks", tags=["tracks"])
|
router = APIRouter(prefix="/tracks", tags=["tracks"])
|
||||||
|
|
||||||
@@ -147,7 +148,18 @@ async def get_track_cover(track_id: uuid.UUID, _: CurrentUser) -> Any: ...
|
|||||||
|
|
||||||
|
|
||||||
@router.post("/{track_id}/metadata/enrich")
|
@router.post("/{track_id}/metadata/enrich")
|
||||||
async def enrich_metadata(track_id: uuid.UUID, _: CurrentUser) -> Any: ...
|
async def enrich_metadata(
|
||||||
|
track_id: uuid.UUID,
|
||||||
|
track_repo: TrackRepoDep,
|
||||||
|
_: CurrentUser,
|
||||||
|
) -> dict[str, str]:
|
||||||
|
"""Re-run metadata enrichment for a track (admin/user-triggered). The work
|
||||||
|
happens in a worker; this only enqueues it. 503 if the queue is down."""
|
||||||
|
track = await track_repo.get_by_id(track_id)
|
||||||
|
if track is None:
|
||||||
|
raise NotFoundError(f"Track {track_id} not found.")
|
||||||
|
job_id = await enqueue("enrich_track", track_id=str(track_id))
|
||||||
|
return {"track_id": str(track_id), "job_id": job_id}
|
||||||
|
|
||||||
|
|
||||||
@router.get("/{track_id}/metadata/matches")
|
@router.get("/{track_id}/metadata/matches")
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ must not abort the whole scan (graceful degradation).
|
|||||||
|
|
||||||
import contextlib
|
import contextlib
|
||||||
import uuid
|
import uuid
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass, field
|
||||||
|
|
||||||
from app.core.logging import get_logger
|
from app.core.logging import get_logger
|
||||||
from app.domain.ports import ArtistRepository, FileStorage, IndexableSource, TrackRepository
|
from app.domain.ports import ArtistRepository, FileStorage, IndexableSource, TrackRepository
|
||||||
@@ -27,6 +27,9 @@ class ImportSummary:
|
|||||||
imported: int
|
imported: int
|
||||||
skipped: int
|
skipped: int
|
||||||
failed: int
|
failed: int
|
||||||
|
# IDs of freshly imported tracks, for the caller to enqueue enrichment
|
||||||
|
# *after* its transaction commits (enqueuing mid-scan would race the worker).
|
||||||
|
imported_ids: list[uuid.UUID] = field(default_factory=list)
|
||||||
|
|
||||||
|
|
||||||
class LibraryImportService:
|
class LibraryImportService:
|
||||||
@@ -44,7 +47,8 @@ class LibraryImportService:
|
|||||||
async def scan_and_import(
|
async def scan_and_import(
|
||||||
self, source: IndexableSource, *, added_by: uuid.UUID | None
|
self, source: IndexableSource, *, added_by: uuid.UUID | None
|
||||||
) -> ImportSummary:
|
) -> ImportSummary:
|
||||||
seen = imported = skipped = failed = 0
|
seen = skipped = failed = 0
|
||||||
|
imported_ids: list[uuid.UUID] = []
|
||||||
for file in source.scan():
|
for file in source.scan():
|
||||||
seen += 1
|
seen += 1
|
||||||
try:
|
try:
|
||||||
@@ -52,13 +56,18 @@ class LibraryImportService:
|
|||||||
if existing is not None:
|
if existing is not None:
|
||||||
skipped += 1
|
skipped += 1
|
||||||
continue
|
continue
|
||||||
await self._import_one(source.name, file, added_by)
|
track_id = await self._import_one(source.name, file, added_by)
|
||||||
imported += 1
|
imported_ids.append(track_id)
|
||||||
except Exception:
|
except Exception:
|
||||||
failed += 1
|
failed += 1
|
||||||
log.warning("import_file_failed", source=source.name, source_id=file.source_id)
|
log.warning("import_file_failed", source=source.name, source_id=file.source_id)
|
||||||
summary = ImportSummary(
|
summary = ImportSummary(
|
||||||
source=source.name, seen=seen, imported=imported, skipped=skipped, failed=failed
|
source=source.name,
|
||||||
|
seen=seen,
|
||||||
|
imported=len(imported_ids),
|
||||||
|
skipped=skipped,
|
||||||
|
failed=failed,
|
||||||
|
imported_ids=imported_ids,
|
||||||
)
|
)
|
||||||
log.info(
|
log.info(
|
||||||
"import_complete",
|
"import_complete",
|
||||||
@@ -72,7 +81,7 @@ class LibraryImportService:
|
|||||||
|
|
||||||
async def _import_one(
|
async def _import_one(
|
||||||
self, source_name: str, file: SourceFile, added_by: uuid.UUID | None
|
self, source_name: str, file: SourceFile, added_by: uuid.UUID | None
|
||||||
) -> None:
|
) -> uuid.UUID:
|
||||||
track_id = uuid.uuid4()
|
track_id = uuid.uuid4()
|
||||||
key = f"tracks/{str(track_id)[:2]}/{track_id}.{file.file_format}"
|
key = f"tracks/{str(track_id)[:2]}/{track_id}.{file.file_format}"
|
||||||
await self._storage.save_file(key, file.path)
|
await self._storage.save_file(key, file.path)
|
||||||
@@ -94,3 +103,4 @@ class LibraryImportService:
|
|||||||
with contextlib.suppress(Exception):
|
with contextlib.suppress(Exception):
|
||||||
await self._storage.delete(key)
|
await self._storage.delete(key)
|
||||||
raise
|
raise
|
||||||
|
return track_id
|
||||||
|
|||||||
@@ -0,0 +1,174 @@
|
|||||||
|
"""MetadataEnrichmentService — the §6.2 pipeline orchestrator.
|
||||||
|
|
||||||
|
Order (tag-first): embedded tags → Chromaprint fingerprint → AcoustID lookup.
|
||||||
|
Tags fix the common well-tagged case offline; AcoustID identifies the rest and
|
||||||
|
supplies a MusicBrainz id. The result updates the track and sets
|
||||||
|
``metadata_status`` to ``enriched`` (identity found) or ``failed`` (nothing).
|
||||||
|
|
||||||
|
Invariants (plan §6.2, CLAUDE.md):
|
||||||
|
- **Never touch ``manual``** — a user-edited track is returned untouched.
|
||||||
|
- **Graceful degradation** — every external step is wrapped; one failure (no
|
||||||
|
fpcalc, no API key, service down) degrades the result, never crashes.
|
||||||
|
- **Idempotent** — re-running only fills gaps; ``apply_enrichment`` never erases.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import uuid
|
||||||
|
from dataclasses import dataclass
|
||||||
|
|
||||||
|
from app.core.logging import get_logger
|
||||||
|
from app.domain.entities.metadata import AudioTags, RecordingMatch
|
||||||
|
from app.domain.ports import (
|
||||||
|
AcoustIdClient,
|
||||||
|
AlbumRepository,
|
||||||
|
ArtistRepository,
|
||||||
|
AudioFingerprinter,
|
||||||
|
AudioTagReader,
|
||||||
|
FileStorage,
|
||||||
|
TrackRepository,
|
||||||
|
)
|
||||||
|
|
||||||
|
log = get_logger(__name__)
|
||||||
|
|
||||||
|
_UNKNOWN_ARTIST = "Unknown Artist"
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class EnrichmentResult:
|
||||||
|
track_id: uuid.UUID
|
||||||
|
status: str # "enriched" | "failed" | "skipped"
|
||||||
|
matched_mbid: str | None = None
|
||||||
|
|
||||||
|
|
||||||
|
class MetadataEnrichmentService:
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
*,
|
||||||
|
tracks: TrackRepository,
|
||||||
|
artists: ArtistRepository,
|
||||||
|
albums: AlbumRepository,
|
||||||
|
storage: FileStorage,
|
||||||
|
tag_reader: AudioTagReader,
|
||||||
|
fingerprinter: AudioFingerprinter,
|
||||||
|
acoustid: AcoustIdClient,
|
||||||
|
) -> None:
|
||||||
|
self._tracks = tracks
|
||||||
|
self._artists = artists
|
||||||
|
self._albums = albums
|
||||||
|
self._storage = storage
|
||||||
|
self._tag_reader = tag_reader
|
||||||
|
self._fingerprinter = fingerprinter
|
||||||
|
self._acoustid = acoustid
|
||||||
|
|
||||||
|
async def enrich(self, track_id: uuid.UUID) -> EnrichmentResult:
|
||||||
|
track = await self._tracks.get_by_id(track_id)
|
||||||
|
if track is None:
|
||||||
|
log.info("enrich_track_missing", track_id=str(track_id))
|
||||||
|
return EnrichmentResult(track_id=track_id, status="skipped")
|
||||||
|
if track.metadata_status == "manual":
|
||||||
|
log.info("enrich_skip_manual", track_id=str(track_id))
|
||||||
|
return EnrichmentResult(track_id=track_id, status="skipped")
|
||||||
|
|
||||||
|
tags = await self._read_local(track.storage_uri)
|
||||||
|
match = await self._identify(track.storage_uri)
|
||||||
|
|
||||||
|
# Merge sources: prefer embedded tags, fall back to the AcoustID match.
|
||||||
|
# ``title`` is guaranteed non-None by the existing track title; the rest
|
||||||
|
# stay None when neither source has them.
|
||||||
|
tag_title = tags.title if tags else None
|
||||||
|
tag_artist = tags.artist if tags else None
|
||||||
|
tag_album = tags.album if tags else None
|
||||||
|
title = _opt_str(tag_title, match.title if match else None) or track.title
|
||||||
|
artist_name = _opt_str(tag_artist, match.artist if match else None)
|
||||||
|
album_title = _opt_str(tag_album, match.album if match else None)
|
||||||
|
year = _first_int(tags.year if tags else None, match.year if match else None)
|
||||||
|
genre = tags.genre if tags else None
|
||||||
|
track_number = tags.track_number if tags else None
|
||||||
|
duration = _first_int(
|
||||||
|
tags.duration_seconds if tags else None,
|
||||||
|
track.duration_seconds,
|
||||||
|
)
|
||||||
|
bitrate = tags.bitrate if tags else None
|
||||||
|
mbid = match.recording_mbid if match else None
|
||||||
|
acoustid_id = match.acoustid if match else None
|
||||||
|
|
||||||
|
artist_id = await self._resolve_artist(artist_name, fallback=track.artist_id)
|
||||||
|
album_id = await self._resolve_album(album_title, artist_id=artist_id, year=year, mbid=mbid)
|
||||||
|
|
||||||
|
identified = bool(artist_name) or album_id is not None or mbid is not None
|
||||||
|
status = "enriched" if identified else "failed"
|
||||||
|
|
||||||
|
await self._tracks.apply_enrichment(
|
||||||
|
track_id,
|
||||||
|
title=title,
|
||||||
|
artist_id=artist_id,
|
||||||
|
album_id=album_id,
|
||||||
|
genre=genre,
|
||||||
|
year=year,
|
||||||
|
track_number=track_number,
|
||||||
|
duration_seconds=duration,
|
||||||
|
bitrate=bitrate,
|
||||||
|
acoustid_fingerprint=acoustid_id,
|
||||||
|
musicbrainz_id=mbid,
|
||||||
|
metadata_status=status,
|
||||||
|
)
|
||||||
|
log.info("enrich_complete", track_id=str(track_id), status=status, mbid=mbid)
|
||||||
|
return EnrichmentResult(track_id=track_id, status=status, matched_mbid=mbid)
|
||||||
|
|
||||||
|
async def _read_local(self, storage_uri: str) -> AudioTags | None:
|
||||||
|
try:
|
||||||
|
async with self._storage.as_local_path(storage_uri) as path:
|
||||||
|
return await self._tag_reader.read(path)
|
||||||
|
except Exception:
|
||||||
|
log.warning("enrich_tag_step_failed", storage_uri=storage_uri)
|
||||||
|
return None
|
||||||
|
|
||||||
|
async def _identify(self, storage_uri: str) -> RecordingMatch | None:
|
||||||
|
if not self._acoustid.is_available() or not self._fingerprinter.is_available():
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
async with self._storage.as_local_path(storage_uri) as path:
|
||||||
|
fingerprint = await self._fingerprinter.calculate(path)
|
||||||
|
if fingerprint is None:
|
||||||
|
return None
|
||||||
|
return await self._acoustid.lookup(fingerprint)
|
||||||
|
except Exception:
|
||||||
|
log.warning("enrich_identify_step_failed", storage_uri=storage_uri)
|
||||||
|
return None
|
||||||
|
|
||||||
|
async def _resolve_artist(self, name: str | None, *, fallback: uuid.UUID) -> uuid.UUID:
|
||||||
|
if not name or name == _UNKNOWN_ARTIST:
|
||||||
|
return fallback
|
||||||
|
artist = await self._artists.get_or_create(name)
|
||||||
|
return artist.id
|
||||||
|
|
||||||
|
async def _resolve_album(
|
||||||
|
self,
|
||||||
|
title: str | None,
|
||||||
|
*,
|
||||||
|
artist_id: uuid.UUID,
|
||||||
|
year: int | None,
|
||||||
|
mbid: str | None,
|
||||||
|
) -> uuid.UUID | None:
|
||||||
|
if not title:
|
||||||
|
return None
|
||||||
|
album = await self._albums.get_or_create(
|
||||||
|
title=title,
|
||||||
|
artist_id=artist_id,
|
||||||
|
year=year,
|
||||||
|
musicbrainz_id=mbid,
|
||||||
|
)
|
||||||
|
return album.id
|
||||||
|
|
||||||
|
|
||||||
|
def _opt_str(*values: str | None) -> str | None:
|
||||||
|
for value in values:
|
||||||
|
if value:
|
||||||
|
return value
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _first_int(*values: int | None) -> int | None:
|
||||||
|
for value in values:
|
||||||
|
if value is not None:
|
||||||
|
return value
|
||||||
|
return None
|
||||||
@@ -5,6 +5,7 @@ import hashlib
|
|||||||
import os
|
import os
|
||||||
import tempfile
|
import tempfile
|
||||||
import uuid
|
import uuid
|
||||||
|
from collections.abc import Awaitable, Callable
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Protocol
|
from typing import Protocol
|
||||||
@@ -14,6 +15,8 @@ import anyio
|
|||||||
from app.domain.entities.user import User
|
from app.domain.entities.user import User
|
||||||
from app.domain.ports import ArtistRepository, FileStorage, TrackRepository
|
from app.domain.ports import ArtistRepository, FileStorage, TrackRepository
|
||||||
|
|
||||||
|
EnrichEnqueuer = Callable[[uuid.UUID], Awaitable[None]]
|
||||||
|
|
||||||
|
|
||||||
class UploadFileProtocol(Protocol):
|
class UploadFileProtocol(Protocol):
|
||||||
filename: str | None
|
filename: str | None
|
||||||
@@ -49,11 +52,13 @@ class UploadService:
|
|||||||
artists: ArtistRepository,
|
artists: ArtistRepository,
|
||||||
storage: FileStorage,
|
storage: FileStorage,
|
||||||
tmp_dir: Path | None = None,
|
tmp_dir: Path | None = None,
|
||||||
|
enqueue_enrich: EnrichEnqueuer | None = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
self._tracks = tracks
|
self._tracks = tracks
|
||||||
self._artists = artists
|
self._artists = artists
|
||||||
self._storage = storage
|
self._storage = storage
|
||||||
self._tmp_dir = tmp_dir
|
self._tmp_dir = tmp_dir
|
||||||
|
self._enqueue_enrich = enqueue_enrich
|
||||||
|
|
||||||
async def handle_upload(
|
async def handle_upload(
|
||||||
self,
|
self,
|
||||||
@@ -105,7 +110,8 @@ class UploadService:
|
|||||||
await self._storage.delete(key)
|
await self._storage.delete(key)
|
||||||
raise
|
raise
|
||||||
|
|
||||||
# TODO(1D): enqueue metadata enrichment task
|
if self._enqueue_enrich is not None:
|
||||||
|
await self._enqueue_enrich(track.id)
|
||||||
|
|
||||||
return UploadResult(
|
return UploadResult(
|
||||||
track_id=track.id,
|
track_id=track.id,
|
||||||
|
|||||||
@@ -73,9 +73,15 @@ class Settings(BaseSettings):
|
|||||||
# -- external services (all optional; graceful degradation) ----------
|
# -- external services (all optional; graceful degradation) ----------
|
||||||
ml_service_url: str | None = None
|
ml_service_url: str | None = None
|
||||||
acoustid_api_key: SecretStr | None = None
|
acoustid_api_key: SecretStr | None = None
|
||||||
|
acoustid_api_url: str = "https://api.acoustid.org/v2/lookup"
|
||||||
musicbrainz_user_agent: str = "mcma-backend/0.1.0 ( https://github.com/your/repo )"
|
musicbrainz_user_agent: str = "mcma-backend/0.1.0 ( https://github.com/your/repo )"
|
||||||
youtube_cookies_path: Path | None = None
|
youtube_cookies_path: Path | None = None
|
||||||
|
|
||||||
|
# -- enrichment -------------------------------------------------------
|
||||||
|
# ``fpcalc`` (Chromaprint) binary; resolved on PATH by default. The Docker
|
||||||
|
# image installs it via libchromaprint-tools.
|
||||||
|
fpcalc_path: str = "fpcalc"
|
||||||
|
|
||||||
@field_validator("database_url")
|
@field_validator("database_url")
|
||||||
@classmethod
|
@classmethod
|
||||||
def _require_async_driver(cls, v: str) -> str:
|
def _require_async_driver(cls, v: str) -> str:
|
||||||
|
|||||||
@@ -3,6 +3,7 @@
|
|||||||
from app.domain.entities.album import Album
|
from app.domain.entities.album import Album
|
||||||
from app.domain.entities.history import PlayHistoryEntry
|
from app.domain.entities.history import PlayHistoryEntry
|
||||||
from app.domain.entities.like import Like
|
from app.domain.entities.like import Like
|
||||||
|
from app.domain.entities.metadata import AudioTags, Fingerprint, RecordingMatch
|
||||||
from app.domain.entities.playlist import Playlist
|
from app.domain.entities.playlist import Playlist
|
||||||
from app.domain.entities.storage import ObjectStat
|
from app.domain.entities.storage import ObjectStat
|
||||||
from app.domain.entities.track import Artist, Track
|
from app.domain.entities.track import Artist, Track
|
||||||
@@ -11,11 +12,14 @@ from app.domain.entities.user import Credentials, SubsonicCredentials, User
|
|||||||
__all__ = [
|
__all__ = [
|
||||||
"Album",
|
"Album",
|
||||||
"Artist",
|
"Artist",
|
||||||
|
"AudioTags",
|
||||||
"Credentials",
|
"Credentials",
|
||||||
|
"Fingerprint",
|
||||||
"Like",
|
"Like",
|
||||||
"ObjectStat",
|
"ObjectStat",
|
||||||
"PlayHistoryEntry",
|
"PlayHistoryEntry",
|
||||||
"Playlist",
|
"Playlist",
|
||||||
|
"RecordingMatch",
|
||||||
"SubsonicCredentials",
|
"SubsonicCredentials",
|
||||||
"Track",
|
"Track",
|
||||||
"User",
|
"User",
|
||||||
|
|||||||
@@ -0,0 +1,53 @@
|
|||||||
|
"""Value objects for the metadata-enrichment pipeline (plan §6.2).
|
||||||
|
|
||||||
|
Pure data carriers between the enrichment service and its adapters (tag reader,
|
||||||
|
fingerprinter, AcoustID). No framework imports — these cross the domain boundary.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from dataclasses import dataclass
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True, slots=True)
|
||||||
|
class AudioTags:
|
||||||
|
"""Embedded tags read from the file itself (ID3 / Vorbis / MP4 …).
|
||||||
|
|
||||||
|
Every field is optional — files are tagged inconsistently. The reader fills
|
||||||
|
what it can and leaves the rest ``None`` for downstream identification.
|
||||||
|
"""
|
||||||
|
|
||||||
|
title: str | None = None
|
||||||
|
artist: str | None = None
|
||||||
|
album: str | None = None
|
||||||
|
album_artist: str | None = None
|
||||||
|
genre: str | None = None
|
||||||
|
year: int | None = None
|
||||||
|
track_number: int | None = None
|
||||||
|
duration_seconds: int | None = None
|
||||||
|
bitrate: int | None = None
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True, slots=True)
|
||||||
|
class Fingerprint:
|
||||||
|
"""Chromaprint fingerprint plus the decoded duration (both needed by AcoustID)."""
|
||||||
|
|
||||||
|
fingerprint: str
|
||||||
|
duration_seconds: int
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True, slots=True)
|
||||||
|
class RecordingMatch:
|
||||||
|
"""A single AcoustID result, flattened to the fields enrichment cares about.
|
||||||
|
|
||||||
|
``acoustid`` is the stable AcoustID identifier (a UUID) — used as the
|
||||||
|
dedup key persisted on ``track.acoustid_fingerprint`` (fits the 64-char
|
||||||
|
column; the raw fingerprint does not). ``recording_mbid`` is the MusicBrainz
|
||||||
|
recording id when present.
|
||||||
|
"""
|
||||||
|
|
||||||
|
acoustid: str
|
||||||
|
score: float
|
||||||
|
recording_mbid: str | None = None
|
||||||
|
title: str | None = None
|
||||||
|
artist: str | None = None
|
||||||
|
album: str | None = None
|
||||||
|
year: int | None = None
|
||||||
@@ -14,11 +14,14 @@ from typing import Protocol
|
|||||||
|
|
||||||
from app.domain.entities import (
|
from app.domain.entities import (
|
||||||
Album,
|
Album,
|
||||||
|
AudioTags,
|
||||||
Credentials,
|
Credentials,
|
||||||
|
Fingerprint,
|
||||||
Like,
|
Like,
|
||||||
ObjectStat,
|
ObjectStat,
|
||||||
PlayHistoryEntry,
|
PlayHistoryEntry,
|
||||||
Playlist,
|
Playlist,
|
||||||
|
RecordingMatch,
|
||||||
SubsonicCredentials,
|
SubsonicCredentials,
|
||||||
User,
|
User,
|
||||||
)
|
)
|
||||||
@@ -153,9 +156,38 @@ class TrackRepository(Protocol):
|
|||||||
genre: str | None,
|
genre: str | None,
|
||||||
year: int | None,
|
year: int | None,
|
||||||
) -> Track: ...
|
) -> Track: ...
|
||||||
|
async def apply_enrichment(
|
||||||
|
self,
|
||||||
|
track_id: uuid.UUID,
|
||||||
|
*,
|
||||||
|
title: str,
|
||||||
|
artist_id: uuid.UUID,
|
||||||
|
album_id: uuid.UUID | None,
|
||||||
|
genre: str | None,
|
||||||
|
year: int | None,
|
||||||
|
track_number: int | None,
|
||||||
|
duration_seconds: int | None,
|
||||||
|
bitrate: int | None,
|
||||||
|
acoustid_fingerprint: str | None,
|
||||||
|
musicbrainz_id: str | None,
|
||||||
|
metadata_status: str,
|
||||||
|
) -> Track:
|
||||||
|
"""Persist auto-enrichment results. Nullable fields are filled only when
|
||||||
|
a non-``None`` value is supplied (re-enrich never erases prior data);
|
||||||
|
``title``/``artist_id``/``metadata_status`` are always written. Callers
|
||||||
|
must not invoke this for ``metadata_status == 'manual'`` tracks."""
|
||||||
|
...
|
||||||
|
|
||||||
|
|
||||||
class AlbumRepository(Protocol):
|
class AlbumRepository(Protocol):
|
||||||
|
async def get_or_create(
|
||||||
|
self,
|
||||||
|
*,
|
||||||
|
title: str,
|
||||||
|
artist_id: uuid.UUID,
|
||||||
|
year: int | None,
|
||||||
|
musicbrainz_id: str | None,
|
||||||
|
) -> Album: ...
|
||||||
async def get_by_id(self, album_id: uuid.UUID) -> Album | None: ...
|
async def get_by_id(self, album_id: uuid.UUID) -> Album | None: ...
|
||||||
async def get_many(self, ids: list[uuid.UUID]) -> list[Album]: ...
|
async def get_many(self, ids: list[uuid.UUID]) -> list[Album]: ...
|
||||||
async def count(self, *, artist_id: uuid.UUID | None, q: str | None) -> int: ...
|
async def count(self, *, artist_id: uuid.UUID | None, q: str | None) -> int: ...
|
||||||
@@ -240,3 +272,28 @@ class IndexableSource(SourceBackend, Protocol):
|
|||||||
"""A source that enumerates files already on disk (e.g. the local folder)."""
|
"""A source that enumerates files already on disk (e.g. the local folder)."""
|
||||||
|
|
||||||
def scan(self) -> Iterator[SourceFile]: ...
|
def scan(self) -> Iterator[SourceFile]: ...
|
||||||
|
|
||||||
|
|
||||||
|
# -- metadata enrichment (plan §6.2) -----------------------------------------
|
||||||
|
class AudioTagReader(Protocol):
|
||||||
|
"""Reads embedded tags from a local audio file. Returns ``None`` only when
|
||||||
|
the file can't be parsed at all — never raises (graceful degradation)."""
|
||||||
|
|
||||||
|
async def read(self, path: Path) -> AudioTags | None: ...
|
||||||
|
|
||||||
|
|
||||||
|
class AudioFingerprinter(Protocol):
|
||||||
|
"""Chromaprint (fpcalc) wrapper. ``is_available`` reflects whether the
|
||||||
|
binary is present; ``calculate`` returns ``None`` on any failure."""
|
||||||
|
|
||||||
|
def is_available(self) -> bool: ...
|
||||||
|
async def calculate(self, path: Path) -> Fingerprint | None: ...
|
||||||
|
|
||||||
|
|
||||||
|
class AcoustIdClient(Protocol):
|
||||||
|
"""AcoustID lookup. ``is_available`` is False without an API key (the whole
|
||||||
|
fingerprint path is then skipped). ``lookup`` returns the best match or
|
||||||
|
``None`` (no result / service down), never raising."""
|
||||||
|
|
||||||
|
def is_available(self) -> bool: ...
|
||||||
|
async def lookup(self, fingerprint: Fingerprint) -> RecordingMatch | None: ...
|
||||||
|
|||||||
@@ -27,6 +27,42 @@ class SqlAlchemyAlbumRepository:
|
|||||||
def __init__(self, session: AsyncSession) -> None:
|
def __init__(self, session: AsyncSession) -> None:
|
||||||
self._session = session
|
self._session = session
|
||||||
|
|
||||||
|
async def get_or_create(
|
||||||
|
self,
|
||||||
|
*,
|
||||||
|
title: str,
|
||||||
|
artist_id: uuid.UUID,
|
||||||
|
year: int | None,
|
||||||
|
musicbrainz_id: str | None,
|
||||||
|
) -> Album:
|
||||||
|
"""Resolve an album by ``(title, artist_id)``, creating it if absent.
|
||||||
|
Backfills ``year``/``musicbrainz_id`` onto an existing row when it lacks
|
||||||
|
them and enrichment now has values (gap-fill, never overwrite)."""
|
||||||
|
row = (
|
||||||
|
await self._session.execute(
|
||||||
|
select(AlbumModel).where(
|
||||||
|
AlbumModel.title == title,
|
||||||
|
AlbumModel.artist_id == artist_id,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
).scalar_one_or_none()
|
||||||
|
if row is None:
|
||||||
|
row = AlbumModel(
|
||||||
|
title=title,
|
||||||
|
artist_id=artist_id,
|
||||||
|
year=year,
|
||||||
|
musicbrainz_id=musicbrainz_id,
|
||||||
|
)
|
||||||
|
self._session.add(row)
|
||||||
|
else:
|
||||||
|
if row.year is None and year is not None:
|
||||||
|
row.year = year
|
||||||
|
if row.musicbrainz_id is None and musicbrainz_id is not None:
|
||||||
|
row.musicbrainz_id = musicbrainz_id
|
||||||
|
await self._session.flush()
|
||||||
|
await self._session.refresh(row)
|
||||||
|
return _to_entity(row)
|
||||||
|
|
||||||
async def get_by_id(self, album_id: uuid.UUID) -> Album | None:
|
async def get_by_id(self, album_id: uuid.UUID) -> Album | None:
|
||||||
row = await self._session.get(AlbumModel, album_id)
|
row = await self._session.get(AlbumModel, album_id)
|
||||||
return _to_entity(row) if row is not None else None
|
return _to_entity(row) if row is not None else None
|
||||||
|
|||||||
@@ -173,3 +173,47 @@ class SqlAlchemyTrackRepository:
|
|||||||
await self._session.flush()
|
await self._session.flush()
|
||||||
await self._session.refresh(row)
|
await self._session.refresh(row)
|
||||||
return _to_entity(row)
|
return _to_entity(row)
|
||||||
|
|
||||||
|
async def apply_enrichment(
|
||||||
|
self,
|
||||||
|
track_id: uuid.UUID,
|
||||||
|
*,
|
||||||
|
title: str,
|
||||||
|
artist_id: uuid.UUID,
|
||||||
|
album_id: uuid.UUID | None,
|
||||||
|
genre: str | None,
|
||||||
|
year: int | None,
|
||||||
|
track_number: int | None,
|
||||||
|
duration_seconds: int | None,
|
||||||
|
bitrate: int | None,
|
||||||
|
acoustid_fingerprint: str | None,
|
||||||
|
musicbrainz_id: str | None,
|
||||||
|
metadata_status: str,
|
||||||
|
) -> Track:
|
||||||
|
row = await self._session.get(TrackModel, track_id)
|
||||||
|
if row is None:
|
||||||
|
raise NotFoundError(f"Track {track_id} not found.")
|
||||||
|
# Identity + status are authoritative for an enrichment run.
|
||||||
|
row.title = title
|
||||||
|
row.artist_id = artist_id
|
||||||
|
row.metadata_status = metadata_status
|
||||||
|
# Nullable extras: fill gaps only — never erase data a prior run found.
|
||||||
|
if album_id is not None:
|
||||||
|
row.album_id = album_id
|
||||||
|
if genre is not None:
|
||||||
|
row.genre = genre
|
||||||
|
if year is not None:
|
||||||
|
row.year = year
|
||||||
|
if track_number is not None:
|
||||||
|
row.track_number = track_number
|
||||||
|
if duration_seconds is not None:
|
||||||
|
row.duration_seconds = duration_seconds
|
||||||
|
if bitrate is not None:
|
||||||
|
row.bitrate = bitrate
|
||||||
|
if acoustid_fingerprint is not None:
|
||||||
|
row.acoustid_fingerprint = acoustid_fingerprint
|
||||||
|
if musicbrainz_id is not None:
|
||||||
|
row.musicbrainz_id = musicbrainz_id
|
||||||
|
await self._session.flush()
|
||||||
|
await self._session.refresh(row)
|
||||||
|
return _to_entity(row)
|
||||||
|
|||||||
@@ -0,0 +1 @@
|
|||||||
|
"""Metadata-enrichment adapters: tag reader, fingerprinter, AcoustID client."""
|
||||||
@@ -0,0 +1,129 @@
|
|||||||
|
"""AcoustIdHttpClient — identifies a recording from its Chromaprint fingerprint.
|
||||||
|
|
||||||
|
One ``/v2/lookup`` call with ``meta=recordings+releasegroups`` returns the
|
||||||
|
AcoustID id, the MusicBrainz recording id, and canonical title/artist/album —
|
||||||
|
metadata that itself originates from MusicBrainz, so a separate MB call is not
|
||||||
|
needed for Phase 1 (plan §6.2 steps 2-3 collapsed into one request).
|
||||||
|
|
||||||
|
Graceful degradation: no API key → ``is_available()`` is False and the whole
|
||||||
|
fingerprint path is skipped; any network/parse error → ``lookup`` returns
|
||||||
|
``None``. A small inter-call delay keeps us within AcoustID's rate limit.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import time
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
|
||||||
|
from app.core.logging import get_logger
|
||||||
|
from app.domain.entities.metadata import Fingerprint, RecordingMatch
|
||||||
|
|
||||||
|
log = get_logger(__name__)
|
||||||
|
|
||||||
|
_DEFAULT_URL = "https://api.acoustid.org/v2/lookup"
|
||||||
|
_TIMEOUT_SECONDS = 10.0
|
||||||
|
_MIN_INTERVAL_SECONDS = 0.34 # AcoustID allows ~3 req/s; stay polite
|
||||||
|
|
||||||
|
|
||||||
|
class AcoustIdHttpClient:
|
||||||
|
"""Implements :class:`app.domain.ports.AcoustIdClient`."""
|
||||||
|
|
||||||
|
_throttle_lock = asyncio.Lock()
|
||||||
|
_last_call_monotonic = 0.0
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
*,
|
||||||
|
api_key: str | None,
|
||||||
|
user_agent: str,
|
||||||
|
api_url: str = _DEFAULT_URL,
|
||||||
|
) -> None:
|
||||||
|
self._api_key = api_key
|
||||||
|
self._user_agent = user_agent
|
||||||
|
self._api_url = api_url
|
||||||
|
|
||||||
|
def is_available(self) -> bool:
|
||||||
|
return bool(self._api_key)
|
||||||
|
|
||||||
|
async def lookup(self, fingerprint: Fingerprint) -> RecordingMatch | None:
|
||||||
|
if not self._api_key:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
await self._throttle()
|
||||||
|
async with httpx.AsyncClient(
|
||||||
|
timeout=_TIMEOUT_SECONDS,
|
||||||
|
headers={"User-Agent": self._user_agent},
|
||||||
|
) as client:
|
||||||
|
resp = await client.get(
|
||||||
|
self._api_url,
|
||||||
|
params={
|
||||||
|
"client": self._api_key,
|
||||||
|
"duration": str(fingerprint.duration_seconds),
|
||||||
|
"fingerprint": fingerprint.fingerprint,
|
||||||
|
"meta": "recordings releasegroups",
|
||||||
|
"format": "json",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
resp.raise_for_status()
|
||||||
|
payload = resp.json()
|
||||||
|
except (httpx.HTTPError, ValueError):
|
||||||
|
log.warning("acoustid_lookup_failed")
|
||||||
|
return None
|
||||||
|
|
||||||
|
return _parse_best_match(payload)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
async def _throttle(cls) -> None:
|
||||||
|
async with cls._throttle_lock:
|
||||||
|
elapsed = time.monotonic() - cls._last_call_monotonic
|
||||||
|
wait = _MIN_INTERVAL_SECONDS - elapsed
|
||||||
|
if wait > 0:
|
||||||
|
await asyncio.sleep(wait)
|
||||||
|
cls._last_call_monotonic = time.monotonic()
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_best_match(payload: object) -> RecordingMatch | None:
|
||||||
|
if not isinstance(payload, dict) or payload.get("status") != "ok":
|
||||||
|
return None
|
||||||
|
results = payload.get("results")
|
||||||
|
if not isinstance(results, list) or not results:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Results are returned best-score-first; take the top scoring one.
|
||||||
|
best = max(results, key=lambda r: r.get("score", 0.0) if isinstance(r, dict) else 0.0)
|
||||||
|
if not isinstance(best, dict):
|
||||||
|
return None
|
||||||
|
|
||||||
|
acoustid = best.get("id")
|
||||||
|
if not isinstance(acoustid, str):
|
||||||
|
return None
|
||||||
|
score = float(best.get("score", 0.0))
|
||||||
|
|
||||||
|
recording_mbid: str | None = None
|
||||||
|
title: str | None = None
|
||||||
|
artist: str | None = None
|
||||||
|
album: str | None = None
|
||||||
|
|
||||||
|
recordings = best.get("recordings")
|
||||||
|
if isinstance(recordings, list) and recordings and isinstance(recordings[0], dict):
|
||||||
|
rec = recordings[0]
|
||||||
|
recording_mbid = rec.get("id") if isinstance(rec.get("id"), str) else None
|
||||||
|
title = rec.get("title") if isinstance(rec.get("title"), str) else None
|
||||||
|
artists = rec.get("artists")
|
||||||
|
if isinstance(artists, list) and artists and isinstance(artists[0], dict):
|
||||||
|
name = artists[0].get("name")
|
||||||
|
artist = name if isinstance(name, str) else None
|
||||||
|
groups = rec.get("releasegroups")
|
||||||
|
if isinstance(groups, list) and groups and isinstance(groups[0], dict):
|
||||||
|
gtitle = groups[0].get("title")
|
||||||
|
album = gtitle if isinstance(gtitle, str) else None
|
||||||
|
|
||||||
|
return RecordingMatch(
|
||||||
|
acoustid=acoustid,
|
||||||
|
score=score,
|
||||||
|
recording_mbid=recording_mbid,
|
||||||
|
title=title,
|
||||||
|
artist=artist,
|
||||||
|
album=album,
|
||||||
|
year=None,
|
||||||
|
)
|
||||||
@@ -0,0 +1,62 @@
|
|||||||
|
"""FpcalcFingerprinter — Chromaprint fingerprint via the ``fpcalc`` binary.
|
||||||
|
|
||||||
|
``fpcalc -json <file>`` emits ``{"duration": float, "fingerprint": str}``. The
|
||||||
|
binary ships in the Docker image (``libchromaprint-tools``). Any failure (binary
|
||||||
|
missing, bad file, timeout) degrades to ``None`` — the pipeline then falls back
|
||||||
|
to tag-only metadata (plan §6.2: one external dependency must never crash it).
|
||||||
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import json
|
||||||
|
import shutil
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from app.core.logging import get_logger
|
||||||
|
from app.domain.entities.metadata import Fingerprint
|
||||||
|
|
||||||
|
log = get_logger(__name__)
|
||||||
|
|
||||||
|
_TIMEOUT_SECONDS = 30
|
||||||
|
|
||||||
|
|
||||||
|
class FpcalcFingerprinter:
|
||||||
|
"""Implements :class:`app.domain.ports.AudioFingerprinter`."""
|
||||||
|
|
||||||
|
def __init__(self, binary: str = "fpcalc") -> None:
|
||||||
|
self._binary = binary
|
||||||
|
|
||||||
|
def is_available(self) -> bool:
|
||||||
|
return shutil.which(self._binary) is not None
|
||||||
|
|
||||||
|
async def calculate(self, path: Path) -> Fingerprint | None:
|
||||||
|
if not self.is_available():
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
proc = await asyncio.create_subprocess_exec(
|
||||||
|
self._binary,
|
||||||
|
"-json",
|
||||||
|
str(path),
|
||||||
|
stdout=asyncio.subprocess.PIPE,
|
||||||
|
stderr=asyncio.subprocess.PIPE,
|
||||||
|
)
|
||||||
|
async with asyncio.timeout(_TIMEOUT_SECONDS):
|
||||||
|
stdout, _stderr = await proc.communicate()
|
||||||
|
except (TimeoutError, OSError):
|
||||||
|
log.warning("fpcalc_failed", path=str(path))
|
||||||
|
return None
|
||||||
|
|
||||||
|
if proc.returncode != 0:
|
||||||
|
log.warning("fpcalc_nonzero", path=str(path), returncode=proc.returncode)
|
||||||
|
return None
|
||||||
|
|
||||||
|
try:
|
||||||
|
data = json.loads(stdout)
|
||||||
|
fingerprint = str(data["fingerprint"])
|
||||||
|
duration = round(float(data["duration"]))
|
||||||
|
except (json.JSONDecodeError, KeyError, ValueError):
|
||||||
|
log.warning("fpcalc_bad_output", path=str(path))
|
||||||
|
return None
|
||||||
|
|
||||||
|
if not fingerprint or duration <= 0:
|
||||||
|
return None
|
||||||
|
return Fingerprint(fingerprint=fingerprint, duration_seconds=duration)
|
||||||
@@ -0,0 +1,88 @@
|
|||||||
|
"""MutagenTagReader — reads embedded tags from a local audio file.
|
||||||
|
|
||||||
|
The offline first pass of enrichment (plan §6.2): well-tagged files get correct
|
||||||
|
artist/album/title without any network call. mutagen's ``easy=True`` mode
|
||||||
|
normalises tag keys across ID3 / Vorbis / MP4, so one code path covers all the
|
||||||
|
formats the library accepts. Parsing is blocking, so it runs in a worker thread.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import re
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import anyio
|
||||||
|
from mutagen import File as MutagenFile # type: ignore[attr-defined]
|
||||||
|
|
||||||
|
from app.core.logging import get_logger
|
||||||
|
from app.domain.entities.metadata import AudioTags
|
||||||
|
|
||||||
|
log = get_logger(__name__)
|
||||||
|
|
||||||
|
_YEAR_RE = re.compile(r"(\d{4})")
|
||||||
|
|
||||||
|
|
||||||
|
def _first(value: object) -> str | None:
|
||||||
|
"""EasyXxx tags expose values as lists; take the first non-empty string."""
|
||||||
|
if isinstance(value, list):
|
||||||
|
value = value[0] if value else None
|
||||||
|
if value is None:
|
||||||
|
return None
|
||||||
|
text = str(value).strip()
|
||||||
|
return text or None
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_year(value: object) -> int | None:
|
||||||
|
text = _first(value)
|
||||||
|
if text is None:
|
||||||
|
return None
|
||||||
|
m = _YEAR_RE.search(text)
|
||||||
|
return int(m.group(1)) if m else None
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_track_number(value: object) -> int | None:
|
||||||
|
text = _first(value)
|
||||||
|
if text is None:
|
||||||
|
return None
|
||||||
|
# "3" or "3/12" → 3
|
||||||
|
head = text.split("/", 1)[0].strip()
|
||||||
|
return int(head) if head.isdigit() else None
|
||||||
|
|
||||||
|
|
||||||
|
class MutagenTagReader:
|
||||||
|
"""Implements :class:`app.domain.ports.AudioTagReader`."""
|
||||||
|
|
||||||
|
async def read(self, path: Path) -> AudioTags | None:
|
||||||
|
try:
|
||||||
|
return await anyio.to_thread.run_sync(self._read_sync, path)
|
||||||
|
except Exception:
|
||||||
|
log.warning("tag_read_failed", path=str(path))
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _read_sync(self, path: Path) -> AudioTags | None:
|
||||||
|
audio = MutagenFile(str(path), easy=True)
|
||||||
|
if audio is None:
|
||||||
|
return None # unrecognised container
|
||||||
|
|
||||||
|
tags = audio.tags or {}
|
||||||
|
info = getattr(audio, "info", None)
|
||||||
|
|
||||||
|
duration = None
|
||||||
|
bitrate = None
|
||||||
|
if info is not None:
|
||||||
|
length = getattr(info, "length", None)
|
||||||
|
if length:
|
||||||
|
duration = round(float(length))
|
||||||
|
raw_bitrate = getattr(info, "bitrate", None)
|
||||||
|
if raw_bitrate:
|
||||||
|
bitrate = int(raw_bitrate) // 1000 # bits/s → kbps for display
|
||||||
|
|
||||||
|
return AudioTags(
|
||||||
|
title=_first(tags.get("title")),
|
||||||
|
artist=_first(tags.get("artist")),
|
||||||
|
album=_first(tags.get("album")),
|
||||||
|
album_artist=_first(tags.get("albumartist")),
|
||||||
|
genre=_first(tags.get("genre")),
|
||||||
|
year=_parse_year(tags.get("date") or tags.get("year")),
|
||||||
|
track_number=_parse_track_number(tags.get("tracknumber")),
|
||||||
|
duration_seconds=duration,
|
||||||
|
bitrate=bitrate,
|
||||||
|
)
|
||||||
@@ -1,7 +1,7 @@
|
|||||||
"""arq worker settings — the queue runtime. Task functions register here.
|
"""arq worker settings — the queue runtime. Task functions register here.
|
||||||
|
|
||||||
Run with: ``arq app.workers.arq_worker.WorkerSettings``.
|
Run with: ``arq app.workers.arq_worker.WorkerSettings``.
|
||||||
Tasks (download, enrich, transcode) are appended to ``functions`` in later steps.
|
Tasks (download, transcode) are appended to ``functions`` in later steps.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from typing import Any, ClassVar
|
from typing import Any, ClassVar
|
||||||
@@ -10,6 +10,7 @@ from arq.connections import RedisSettings
|
|||||||
|
|
||||||
from app.core.config import get_settings
|
from app.core.config import get_settings
|
||||||
from app.core.logging import configure_logging, get_logger
|
from app.core.logging import configure_logging, get_logger
|
||||||
|
from app.workers.tasks.enrich_task import enrich_track
|
||||||
from app.workers.tasks.import_task import scan_local_folder
|
from app.workers.tasks.import_task import scan_local_folder
|
||||||
|
|
||||||
log = get_logger("worker")
|
log = get_logger("worker")
|
||||||
@@ -26,7 +27,7 @@ async def shutdown(_ctx: dict[str, Any]) -> None:
|
|||||||
|
|
||||||
|
|
||||||
class WorkerSettings:
|
class WorkerSettings:
|
||||||
functions: ClassVar[list[Any]] = [scan_local_folder]
|
functions: ClassVar[list[Any]] = [scan_local_folder, enrich_track]
|
||||||
on_startup = startup
|
on_startup = startup
|
||||||
on_shutdown = shutdown
|
on_shutdown = shutdown
|
||||||
max_jobs = get_settings().max_parallel_downloads
|
max_jobs = get_settings().max_parallel_downloads
|
||||||
|
|||||||
@@ -4,14 +4,18 @@ A short-lived pool per call keeps things simple (enqueues are rare, admin-driven
|
|||||||
actions). Redis being down degrades to a clean 503 rather than a crash
|
actions). Redis being down degrades to a clean 503 rather than a crash
|
||||||
(graceful degradation)."""
|
(graceful degradation)."""
|
||||||
|
|
||||||
|
import uuid
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
from arq import create_pool
|
from arq import create_pool
|
||||||
from arq.connections import RedisSettings
|
from arq.connections import RedisSettings
|
||||||
|
|
||||||
from app.core.config import get_settings
|
from app.core.config import get_settings
|
||||||
|
from app.core.logging import get_logger
|
||||||
from app.domain.errors import DependencyUnavailableError
|
from app.domain.errors import DependencyUnavailableError
|
||||||
|
|
||||||
|
log = get_logger("worker.queue")
|
||||||
|
|
||||||
|
|
||||||
async def enqueue(function: str, **kwargs: Any) -> str:
|
async def enqueue(function: str, **kwargs: Any) -> str:
|
||||||
"""Enqueue ``function`` by name, returning the job id. Raises
|
"""Enqueue ``function`` by name, returning the job id. Raises
|
||||||
@@ -28,3 +32,18 @@ async def enqueue(function: str, **kwargs: Any) -> str:
|
|||||||
if job is None:
|
if job is None:
|
||||||
raise DependencyUnavailableError("Could not enqueue job.")
|
raise DependencyUnavailableError("Could not enqueue job.")
|
||||||
return str(job.job_id)
|
return str(job.job_id)
|
||||||
|
|
||||||
|
|
||||||
|
async def enqueue_enrich(track_id: uuid.UUID) -> None:
|
||||||
|
"""Best-effort enqueue of metadata enrichment for a freshly stored track.
|
||||||
|
|
||||||
|
The track is already persisted, so enrichment is a follow-up, not a barrier:
|
||||||
|
if the queue is unreachable we log and move on (graceful degradation). The
|
||||||
|
track stays ``metadata_status=pending`` and can be re-enriched later.
|
||||||
|
|
||||||
|
Deferred a few seconds so the caller's DB transaction is committed before the
|
||||||
|
worker looks the track up (the upload request commits only after it returns)."""
|
||||||
|
try:
|
||||||
|
await enqueue("enrich_track", track_id=str(track_id), _defer_by=5)
|
||||||
|
except DependencyUnavailableError:
|
||||||
|
log.warning("enrich_enqueue_failed", track_id=str(track_id))
|
||||||
|
|||||||
@@ -0,0 +1,56 @@
|
|||||||
|
"""arq task: enrich one track's metadata (plan §6.2, §1D).
|
||||||
|
|
||||||
|
Wires the §6.2 pipeline adapters to :class:`MetadataEnrichmentService` and runs
|
||||||
|
it in the worker's own transactional session. Enqueued (deferred) after upload
|
||||||
|
and after a local-folder import. Idempotent and best-effort — a missing track or
|
||||||
|
a ``manual`` one is a clean no-op.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import uuid
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from app.application.metadata_service import MetadataEnrichmentService
|
||||||
|
from app.core.config import get_settings
|
||||||
|
from app.core.logging import get_logger
|
||||||
|
from app.infrastructure.db import session_scope
|
||||||
|
from app.infrastructure.db.repositories import (
|
||||||
|
SqlAlchemyAlbumRepository,
|
||||||
|
SqlAlchemyArtistRepository,
|
||||||
|
SqlAlchemyTrackRepository,
|
||||||
|
)
|
||||||
|
from app.infrastructure.metadata.acoustid import AcoustIdHttpClient
|
||||||
|
from app.infrastructure.metadata.fingerprint import FpcalcFingerprinter
|
||||||
|
from app.infrastructure.metadata.tags import MutagenTagReader
|
||||||
|
from app.infrastructure.storage.provider import get_file_storage
|
||||||
|
|
||||||
|
log = get_logger("worker.enrich")
|
||||||
|
|
||||||
|
|
||||||
|
async def enrich_track(_ctx: dict[str, Any], *, track_id: str) -> dict[str, Any]:
|
||||||
|
settings = get_settings()
|
||||||
|
api_key = (
|
||||||
|
settings.acoustid_api_key.get_secret_value() if settings.acoustid_api_key else None
|
||||||
|
)
|
||||||
|
acoustid = AcoustIdHttpClient(
|
||||||
|
api_key=api_key,
|
||||||
|
user_agent=settings.musicbrainz_user_agent,
|
||||||
|
api_url=settings.acoustid_api_url,
|
||||||
|
)
|
||||||
|
|
||||||
|
async with session_scope() as session:
|
||||||
|
service = MetadataEnrichmentService(
|
||||||
|
tracks=SqlAlchemyTrackRepository(session),
|
||||||
|
artists=SqlAlchemyArtistRepository(session),
|
||||||
|
albums=SqlAlchemyAlbumRepository(session),
|
||||||
|
storage=get_file_storage(),
|
||||||
|
tag_reader=MutagenTagReader(),
|
||||||
|
fingerprinter=FpcalcFingerprinter(settings.fpcalc_path),
|
||||||
|
acoustid=acoustid,
|
||||||
|
)
|
||||||
|
result = await service.enrich(uuid.UUID(track_id))
|
||||||
|
|
||||||
|
return {
|
||||||
|
"track_id": str(result.track_id),
|
||||||
|
"status": result.status,
|
||||||
|
"mbid": result.matched_mbid,
|
||||||
|
}
|
||||||
@@ -18,6 +18,7 @@ from app.infrastructure.db.repositories import (
|
|||||||
)
|
)
|
||||||
from app.infrastructure.sources.registry import build_source_registry
|
from app.infrastructure.sources.registry import build_source_registry
|
||||||
from app.infrastructure.storage.provider import get_file_storage
|
from app.infrastructure.storage.provider import get_file_storage
|
||||||
|
from app.workers.queue import enqueue_enrich
|
||||||
|
|
||||||
log = get_logger("worker.import")
|
log = get_logger("worker.import")
|
||||||
|
|
||||||
@@ -37,6 +38,11 @@ async def scan_local_folder(
|
|||||||
)
|
)
|
||||||
summary = await service.scan_and_import(backend, added_by=actor)
|
summary = await service.scan_and_import(backend, added_by=actor)
|
||||||
|
|
||||||
|
# Enqueue enrichment only after the import transaction has committed above,
|
||||||
|
# so the enrich worker is guaranteed to see the new rows.
|
||||||
|
for track_id in summary.imported_ids:
|
||||||
|
await enqueue_enrich(track_id)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"source": summary.source,
|
"source": summary.source,
|
||||||
"seen": summary.seen,
|
"seen": summary.seen,
|
||||||
|
|||||||
@@ -25,6 +25,8 @@ dependencies = [
|
|||||||
"cryptography>=44.0",
|
"cryptography>=44.0",
|
||||||
# outbound http (ML client, MusicBrainz, AcoustID)
|
# outbound http (ML client, MusicBrainz, AcoustID)
|
||||||
"httpx>=0.28",
|
"httpx>=0.28",
|
||||||
|
# embedded audio tag reading (enrichment tag pre-pass)
|
||||||
|
"mutagen>=1.47",
|
||||||
# S3-compatible object storage
|
# S3-compatible object storage
|
||||||
"aioboto3>=13.0",
|
"aioboto3>=13.0",
|
||||||
# logging
|
# logging
|
||||||
|
|||||||
@@ -0,0 +1,75 @@
|
|||||||
|
"""Unit tests for the AcoustID response parser — pure, no network."""
|
||||||
|
|
||||||
|
from app.infrastructure.metadata.acoustid import _parse_best_match
|
||||||
|
|
||||||
|
|
||||||
|
def _payload_with_results(results: list[object]) -> dict[str, object]:
|
||||||
|
return {"status": "ok", "results": results}
|
||||||
|
|
||||||
|
|
||||||
|
def test_parses_full_recording() -> None:
|
||||||
|
payload = _payload_with_results(
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"id": "acoustid-1",
|
||||||
|
"score": 0.97,
|
||||||
|
"recordings": [
|
||||||
|
{
|
||||||
|
"id": "mb-rec-1",
|
||||||
|
"title": "One More Time",
|
||||||
|
"artists": [{"id": "a1", "name": "Daft Punk"}],
|
||||||
|
"releasegroups": [{"id": "rg1", "title": "Discovery"}],
|
||||||
|
}
|
||||||
|
],
|
||||||
|
}
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
match = _parse_best_match(payload)
|
||||||
|
|
||||||
|
assert match is not None
|
||||||
|
assert match.acoustid == "acoustid-1"
|
||||||
|
assert match.recording_mbid == "mb-rec-1"
|
||||||
|
assert match.title == "One More Time"
|
||||||
|
assert match.artist == "Daft Punk"
|
||||||
|
assert match.album == "Discovery"
|
||||||
|
assert match.score == 0.97
|
||||||
|
|
||||||
|
|
||||||
|
def test_picks_highest_score() -> None:
|
||||||
|
payload = _payload_with_results(
|
||||||
|
[
|
||||||
|
{"id": "low", "score": 0.40, "recordings": [{"id": "r-low", "title": "Low"}]},
|
||||||
|
{"id": "high", "score": 0.92, "recordings": [{"id": "r-high", "title": "High"}]},
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
match = _parse_best_match(payload)
|
||||||
|
|
||||||
|
assert match is not None
|
||||||
|
assert match.acoustid == "high"
|
||||||
|
assert match.title == "High"
|
||||||
|
|
||||||
|
|
||||||
|
def test_result_without_recordings_still_returns_id() -> None:
|
||||||
|
payload = _payload_with_results([{"id": "acoustid-only", "score": 0.5}])
|
||||||
|
|
||||||
|
match = _parse_best_match(payload)
|
||||||
|
|
||||||
|
assert match is not None
|
||||||
|
assert match.acoustid == "acoustid-only"
|
||||||
|
assert match.recording_mbid is None
|
||||||
|
assert match.title is None
|
||||||
|
|
||||||
|
|
||||||
|
def test_error_status_returns_none() -> None:
|
||||||
|
assert _parse_best_match({"status": "error", "error": {"message": "bad"}}) is None
|
||||||
|
|
||||||
|
|
||||||
|
def test_empty_results_returns_none() -> None:
|
||||||
|
assert _parse_best_match(_payload_with_results([])) is None
|
||||||
|
|
||||||
|
|
||||||
|
def test_non_dict_payload_returns_none() -> None:
|
||||||
|
assert _parse_best_match("nonsense") is None
|
||||||
|
assert _parse_best_match(None) is None
|
||||||
@@ -0,0 +1,283 @@
|
|||||||
|
"""Unit tests for MetadataEnrichmentService — DB-free, in-memory fakes.
|
||||||
|
|
||||||
|
Covers the §6.2 orchestration contract: tag-first merge, AcoustID fallback,
|
||||||
|
artist/album resolution, status transitions, and the hard invariants
|
||||||
|
(``manual`` untouched, graceful degradation, idempotent gap-fill).
|
||||||
|
"""
|
||||||
|
|
||||||
|
import datetime as dt
|
||||||
|
import uuid
|
||||||
|
from collections.abc import AsyncIterator
|
||||||
|
from contextlib import asynccontextmanager
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
from app.application.metadata_service import MetadataEnrichmentService
|
||||||
|
from app.domain.entities import Artist, Track
|
||||||
|
from app.domain.entities.album import Album
|
||||||
|
from app.domain.entities.metadata import AudioTags, Fingerprint, RecordingMatch
|
||||||
|
|
||||||
|
pytestmark = pytest.mark.asyncio
|
||||||
|
|
||||||
|
_UNKNOWN = "Unknown Artist"
|
||||||
|
|
||||||
|
|
||||||
|
def _track(*, metadata_status: str = "pending", title: str = "raw-stem") -> Track:
|
||||||
|
now = dt.datetime.now(dt.UTC)
|
||||||
|
return Track(
|
||||||
|
id=uuid.uuid4(),
|
||||||
|
title=title,
|
||||||
|
artist_id=uuid.uuid4(), # the "Unknown Artist" id
|
||||||
|
album_id=None,
|
||||||
|
storage_uri="tracks/aa/song.mp3",
|
||||||
|
file_format="mp3",
|
||||||
|
file_size=123,
|
||||||
|
source="upload",
|
||||||
|
source_id="deadbeef",
|
||||||
|
duration_seconds=None,
|
||||||
|
genre=None,
|
||||||
|
year=None,
|
||||||
|
metadata_status=metadata_status,
|
||||||
|
created_at=now,
|
||||||
|
updated_at=now,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class FakeTrackRepo:
|
||||||
|
def __init__(self, track: Track | None) -> None:
|
||||||
|
self._track = track
|
||||||
|
self.applied: dict[str, object] | None = None
|
||||||
|
|
||||||
|
async def get_by_id(self, track_id: uuid.UUID) -> Track | None:
|
||||||
|
return self._track
|
||||||
|
|
||||||
|
async def apply_enrichment(self, track_id: uuid.UUID, **kw: object) -> Track:
|
||||||
|
self.applied = kw
|
||||||
|
return self._track # type: ignore[return-value]
|
||||||
|
|
||||||
|
|
||||||
|
class FakeArtistRepo:
|
||||||
|
def __init__(self) -> None:
|
||||||
|
self.created: list[str] = []
|
||||||
|
|
||||||
|
async def get_or_create(self, name: str) -> Artist:
|
||||||
|
self.created.append(name)
|
||||||
|
now = dt.datetime.now(dt.UTC)
|
||||||
|
return Artist(id=uuid.uuid4(), name=name, created_at=now, updated_at=now)
|
||||||
|
|
||||||
|
|
||||||
|
class FakeAlbumRepo:
|
||||||
|
def __init__(self) -> None:
|
||||||
|
self.created: list[tuple[str, uuid.UUID]] = []
|
||||||
|
|
||||||
|
async def get_or_create(
|
||||||
|
self, *, title: str, artist_id: uuid.UUID, year: int | None, musicbrainz_id: str | None
|
||||||
|
) -> Album:
|
||||||
|
self.created.append((title, artist_id))
|
||||||
|
now = dt.datetime.now(dt.UTC)
|
||||||
|
return Album(
|
||||||
|
id=uuid.uuid4(),
|
||||||
|
title=title,
|
||||||
|
artist_id=artist_id,
|
||||||
|
year=year,
|
||||||
|
cover_path=None,
|
||||||
|
musicbrainz_id=musicbrainz_id,
|
||||||
|
created_at=now,
|
||||||
|
updated_at=now,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class FakeStorage:
|
||||||
|
@asynccontextmanager
|
||||||
|
async def as_local_path(self, key: str) -> AsyncIterator[Path]:
|
||||||
|
yield Path("/tmp") / key
|
||||||
|
|
||||||
|
|
||||||
|
class FakeTagReader:
|
||||||
|
def __init__(self, tags: AudioTags | None) -> None:
|
||||||
|
self._tags = tags
|
||||||
|
|
||||||
|
async def read(self, path: Path) -> AudioTags | None:
|
||||||
|
return self._tags
|
||||||
|
|
||||||
|
|
||||||
|
class FakeFingerprinter:
|
||||||
|
def __init__(self, fp: Fingerprint | None, *, available: bool = True) -> None:
|
||||||
|
self._fp = fp
|
||||||
|
self._available = available
|
||||||
|
|
||||||
|
def is_available(self) -> bool:
|
||||||
|
return self._available
|
||||||
|
|
||||||
|
async def calculate(self, path: Path) -> Fingerprint | None:
|
||||||
|
return self._fp
|
||||||
|
|
||||||
|
|
||||||
|
class FakeAcoustId:
|
||||||
|
def __init__(self, match: RecordingMatch | None, *, available: bool = True) -> None:
|
||||||
|
self._match = match
|
||||||
|
self._available = available
|
||||||
|
self.calls = 0
|
||||||
|
|
||||||
|
def is_available(self) -> bool:
|
||||||
|
return self._available
|
||||||
|
|
||||||
|
async def lookup(self, fingerprint: Fingerprint) -> RecordingMatch | None:
|
||||||
|
self.calls += 1
|
||||||
|
return self._match
|
||||||
|
|
||||||
|
|
||||||
|
def _service(
|
||||||
|
*,
|
||||||
|
track: Track | None,
|
||||||
|
tags: AudioTags | None = None,
|
||||||
|
fp: Fingerprint | None = None,
|
||||||
|
match: RecordingMatch | None = None,
|
||||||
|
fp_available: bool = True,
|
||||||
|
acoustid_available: bool = True,
|
||||||
|
) -> tuple[MetadataEnrichmentService, FakeTrackRepo, FakeArtistRepo, FakeAlbumRepo, FakeAcoustId]:
|
||||||
|
tracks = FakeTrackRepo(track)
|
||||||
|
artists = FakeArtistRepo()
|
||||||
|
albums = FakeAlbumRepo()
|
||||||
|
acoustid = FakeAcoustId(match, available=acoustid_available)
|
||||||
|
service = MetadataEnrichmentService(
|
||||||
|
tracks=tracks, # type: ignore[arg-type]
|
||||||
|
artists=artists, # type: ignore[arg-type]
|
||||||
|
albums=albums, # type: ignore[arg-type]
|
||||||
|
storage=FakeStorage(), # type: ignore[arg-type]
|
||||||
|
tag_reader=FakeTagReader(tags), # type: ignore[arg-type]
|
||||||
|
fingerprinter=FakeFingerprinter(fp, available=fp_available), # type: ignore[arg-type]
|
||||||
|
acoustid=acoustid, # type: ignore[arg-type]
|
||||||
|
)
|
||||||
|
return service, tracks, artists, albums, acoustid
|
||||||
|
|
||||||
|
|
||||||
|
async def test_tags_only_enriches_and_relinks_artist_and_album() -> None:
|
||||||
|
track = _track()
|
||||||
|
tags = AudioTags(
|
||||||
|
title="Real Title",
|
||||||
|
artist="Pink Floyd",
|
||||||
|
album="The Wall",
|
||||||
|
genre="Rock",
|
||||||
|
year=1979,
|
||||||
|
track_number=1,
|
||||||
|
duration_seconds=222,
|
||||||
|
)
|
||||||
|
service, tracks, artists, albums, acoustid = _service(track=track, tags=tags)
|
||||||
|
|
||||||
|
result = await service.enrich(track.id)
|
||||||
|
|
||||||
|
assert result.status == "enriched"
|
||||||
|
assert acoustid.calls == 0 # no fingerprint → no lookup needed
|
||||||
|
assert "Pink Floyd" in artists.created
|
||||||
|
assert albums.created and albums.created[0][0] == "The Wall"
|
||||||
|
applied = tracks.applied
|
||||||
|
assert applied is not None
|
||||||
|
assert applied["title"] == "Real Title"
|
||||||
|
assert applied["genre"] == "Rock"
|
||||||
|
assert applied["year"] == 1979
|
||||||
|
assert applied["track_number"] == 1
|
||||||
|
assert applied["duration_seconds"] == 222
|
||||||
|
assert applied["metadata_status"] == "enriched"
|
||||||
|
|
||||||
|
|
||||||
|
async def test_manual_track_is_never_touched() -> None:
|
||||||
|
track = _track(metadata_status="manual")
|
||||||
|
service, tracks, _, _, _ = _service(track=track, tags=AudioTags(artist="X"))
|
||||||
|
|
||||||
|
result = await service.enrich(track.id)
|
||||||
|
|
||||||
|
assert result.status == "skipped"
|
||||||
|
assert tracks.applied is None # nothing written
|
||||||
|
|
||||||
|
|
||||||
|
async def test_missing_track_is_a_clean_noop() -> None:
|
||||||
|
service, tracks, _, _, _ = _service(track=None)
|
||||||
|
|
||||||
|
result = await service.enrich(uuid.uuid4())
|
||||||
|
|
||||||
|
assert result.status == "skipped"
|
||||||
|
assert tracks.applied is None
|
||||||
|
|
||||||
|
|
||||||
|
async def test_nothing_found_marks_failed() -> None:
|
||||||
|
track = _track()
|
||||||
|
# No tags, no fingerprint → no identity at all.
|
||||||
|
service, tracks, artists, albums, _acoustid = _service(track=track, tags=None, fp=None)
|
||||||
|
|
||||||
|
result = await service.enrich(track.id)
|
||||||
|
|
||||||
|
assert result.status == "failed"
|
||||||
|
assert artists.created == [] # artist stays the original unknown
|
||||||
|
assert albums.created == []
|
||||||
|
applied = tracks.applied
|
||||||
|
assert applied is not None
|
||||||
|
assert applied["artist_id"] == track.artist_id # fallback kept
|
||||||
|
assert applied["metadata_status"] == "failed"
|
||||||
|
|
||||||
|
|
||||||
|
async def test_acoustid_path_fills_when_tags_absent() -> None:
|
||||||
|
track = _track()
|
||||||
|
fp = Fingerprint(fingerprint="AQAAxyz", duration_seconds=200)
|
||||||
|
match = RecordingMatch(
|
||||||
|
acoustid="acoustid-uuid",
|
||||||
|
score=0.95,
|
||||||
|
recording_mbid="mb-recording-id",
|
||||||
|
title="Identified Title",
|
||||||
|
artist="Daft Punk",
|
||||||
|
album="Discovery",
|
||||||
|
)
|
||||||
|
service, tracks, artists, _albums, acoustid = _service(
|
||||||
|
track=track, tags=None, fp=fp, match=match
|
||||||
|
)
|
||||||
|
|
||||||
|
result = await service.enrich(track.id)
|
||||||
|
|
||||||
|
assert result.status == "enriched"
|
||||||
|
assert result.matched_mbid == "mb-recording-id"
|
||||||
|
assert acoustid.calls == 1
|
||||||
|
applied = tracks.applied
|
||||||
|
assert applied is not None
|
||||||
|
assert applied["title"] == "Identified Title"
|
||||||
|
assert applied["musicbrainz_id"] == "mb-recording-id"
|
||||||
|
assert applied["acoustid_fingerprint"] == "acoustid-uuid"
|
||||||
|
assert "Daft Punk" in artists.created
|
||||||
|
|
||||||
|
|
||||||
|
async def test_tags_win_over_acoustid_for_overlapping_fields() -> None:
|
||||||
|
track = _track()
|
||||||
|
fp = Fingerprint(fingerprint="AQAA", duration_seconds=200)
|
||||||
|
tags = AudioTags(title="Tagged Title", artist="Tagged Artist")
|
||||||
|
match = RecordingMatch(
|
||||||
|
acoustid="aid",
|
||||||
|
score=0.9,
|
||||||
|
recording_mbid="mbid",
|
||||||
|
title="AcoustID Title",
|
||||||
|
artist="AcoustID Artist",
|
||||||
|
)
|
||||||
|
service, tracks, artists, _albums, _acoustid = _service(
|
||||||
|
track=track, tags=tags, fp=fp, match=match
|
||||||
|
)
|
||||||
|
|
||||||
|
await service.enrich(track.id)
|
||||||
|
|
||||||
|
applied = tracks.applied
|
||||||
|
assert applied is not None
|
||||||
|
assert applied["title"] == "Tagged Title" # tag preferred
|
||||||
|
assert "Tagged Artist" in artists.created
|
||||||
|
# but the MBID from AcoustID is still captured
|
||||||
|
assert applied["musicbrainz_id"] == "mbid"
|
||||||
|
|
||||||
|
|
||||||
|
async def test_fingerprint_skipped_when_acoustid_unavailable() -> None:
|
||||||
|
track = _track()
|
||||||
|
fp = Fingerprint(fingerprint="AQAA", duration_seconds=200)
|
||||||
|
service, _tracks, _artists, _albums, acoustid = _service(
|
||||||
|
track=track, tags=AudioTags(artist="Tagged"), fp=fp, acoustid_available=False
|
||||||
|
)
|
||||||
|
|
||||||
|
result = await service.enrich(track.id)
|
||||||
|
|
||||||
|
# tags still enrich, but no AcoustID call is attempted
|
||||||
|
assert acoustid.calls == 0
|
||||||
|
assert result.status == "enriched"
|
||||||
@@ -0,0 +1,27 @@
|
|||||||
|
"""Unit tests for the mutagen tag-parsing helpers — pure, no files."""
|
||||||
|
|
||||||
|
from app.infrastructure.metadata.tags import _first, _parse_track_number, _parse_year
|
||||||
|
|
||||||
|
|
||||||
|
def test_first_takes_head_of_list() -> None:
|
||||||
|
assert _first(["Pink Floyd", "other"]) == "Pink Floyd"
|
||||||
|
assert _first("Solo") == "Solo"
|
||||||
|
assert _first([]) is None
|
||||||
|
assert _first(None) is None
|
||||||
|
assert _first([" "]) is None # whitespace-only → None
|
||||||
|
|
||||||
|
|
||||||
|
def test_parse_year_extracts_four_digits() -> None:
|
||||||
|
assert _parse_year(["1979"]) == 1979
|
||||||
|
assert _parse_year(["1979-01-02"]) == 1979
|
||||||
|
assert _parse_year("2021-12") == 2021
|
||||||
|
assert _parse_year(["no year"]) is None
|
||||||
|
assert _parse_year(None) is None
|
||||||
|
|
||||||
|
|
||||||
|
def test_parse_track_number_handles_slash_form() -> None:
|
||||||
|
assert _parse_track_number(["3/12"]) == 3
|
||||||
|
assert _parse_track_number(["7"]) == 7
|
||||||
|
assert _parse_track_number("1/10") == 1
|
||||||
|
assert _parse_track_number(["A1"]) is None
|
||||||
|
assert _parse_track_number(None) is None
|
||||||
Reference in New Issue
Block a user