feat(enrichment): tag-first metadata pipeline (§1D)
Implements the §6.2 enrichment pipeline: embedded tags → Chromaprint
fingerprint → AcoustID lookup. Well-tagged files get correct
artist/album/title offline; the rest are identified via AcoustID
(which also yields a MusicBrainz recording id in one call).
- domain: AudioTags/Fingerprint/RecordingMatch value objects; ports
AudioTagReader, AudioFingerprinter, AcoustIdClient; TrackRepository
.apply_enrichment (gap-fill, never erases) + AlbumRepository.get_or_create
- infrastructure/metadata: MutagenTagReader, FpcalcFingerprinter,
AcoustIdHttpClient (rich meta=recordings+releasegroups, throttled)
- application: MetadataEnrichmentService — tags preferred, AcoustID fills
gaps; resolves artist/album; status enriched/failed; skips manual;
every external step wrapped (graceful degradation)
- workers: enrich_task registered; enqueue_enrich is best-effort and
deferred so the caller's txn commits before the worker reads the row
- wiring: upload enqueues after add; import returns imported_ids and
enqueues post-commit (mid-scan would race the worker); manual
POST /tracks/{id}/metadata/enrich endpoint
- deps: add mutagen (fpcalc/ffmpeg already in the image)
Tests: metadata service orchestration, AcoustID parser, tag helpers.
125 passed; mypy strict + ruff clean.
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,62 @@
|
||||
"""FpcalcFingerprinter — Chromaprint fingerprint via the ``fpcalc`` binary.
|
||||
|
||||
``fpcalc -json <file>`` emits ``{"duration": float, "fingerprint": str}``. The
|
||||
binary ships in the Docker image (``libchromaprint-tools``). Any failure (binary
|
||||
missing, bad file, timeout) degrades to ``None`` — the pipeline then falls back
|
||||
to tag-only metadata (plan §6.2: one external dependency must never crash it).
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
|
||||
from app.core.logging import get_logger
|
||||
from app.domain.entities.metadata import Fingerprint
|
||||
|
||||
log = get_logger(__name__)
|
||||
|
||||
_TIMEOUT_SECONDS = 30
|
||||
|
||||
|
||||
class FpcalcFingerprinter:
|
||||
"""Implements :class:`app.domain.ports.AudioFingerprinter`."""
|
||||
|
||||
def __init__(self, binary: str = "fpcalc") -> None:
|
||||
self._binary = binary
|
||||
|
||||
def is_available(self) -> bool:
|
||||
return shutil.which(self._binary) is not None
|
||||
|
||||
async def calculate(self, path: Path) -> Fingerprint | None:
|
||||
if not self.is_available():
|
||||
return None
|
||||
try:
|
||||
proc = await asyncio.create_subprocess_exec(
|
||||
self._binary,
|
||||
"-json",
|
||||
str(path),
|
||||
stdout=asyncio.subprocess.PIPE,
|
||||
stderr=asyncio.subprocess.PIPE,
|
||||
)
|
||||
async with asyncio.timeout(_TIMEOUT_SECONDS):
|
||||
stdout, _stderr = await proc.communicate()
|
||||
except (TimeoutError, OSError):
|
||||
log.warning("fpcalc_failed", path=str(path))
|
||||
return None
|
||||
|
||||
if proc.returncode != 0:
|
||||
log.warning("fpcalc_nonzero", path=str(path), returncode=proc.returncode)
|
||||
return None
|
||||
|
||||
try:
|
||||
data = json.loads(stdout)
|
||||
fingerprint = str(data["fingerprint"])
|
||||
duration = round(float(data["duration"]))
|
||||
except (json.JSONDecodeError, KeyError, ValueError):
|
||||
log.warning("fpcalc_bad_output", path=str(path))
|
||||
return None
|
||||
|
||||
if not fingerprint or duration <= 0:
|
||||
return None
|
||||
return Fingerprint(fingerprint=fingerprint, duration_seconds=duration)
|
||||
Reference in New Issue
Block a user