78007461e1
Pluggable fetch source: ytmusicapi search + yt-dlp download (cookies-file guard), DownloadJob entity/repo + DownloadService, download_task worker with exponential-backoff retries, and wired /search, /sources/{source}/search, and /downloads endpoints. Adds youtube_enabled/cookies config, yt-dlp+ytmusicapi deps, and the download_jobs.track_id migration. Snapshot also bundles in-progress storage/tracks/acoustid edits.
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
162 lines
5.5 KiB
Python
162 lines
5.5 KiB
Python
"""AcoustIdHttpClient — identifies a recording from its Chromaprint fingerprint.
|
|
|
|
One ``/v2/lookup`` call with ``meta=recordings+releasegroups`` returns the
|
|
AcoustID id, the MusicBrainz recording id, and canonical title/artist/album —
|
|
metadata that itself originates from MusicBrainz, so a separate MB call is not
|
|
needed for Phase 1 (plan §6.2 steps 2-3 collapsed into one request).
|
|
|
|
Graceful degradation: no API key → ``is_available()`` is False and the whole
|
|
fingerprint path is skipped; any network/parse error → ``lookup`` returns
|
|
``None``. A small inter-call delay keeps us within AcoustID's rate limit.
|
|
"""
|
|
|
|
import asyncio
|
|
import time
|
|
|
|
import httpx
|
|
|
|
from app.core.logging import get_logger
|
|
from app.domain.entities.metadata import Fingerprint, RecordingMatch
|
|
|
|
log = get_logger(__name__)
|
|
|
|
_DEFAULT_URL = "https://api.acoustid.org/v2/lookup"
|
|
_TIMEOUT_SECONDS = 10.0
|
|
_MIN_INTERVAL_SECONDS = 0.34 # AcoustID allows ~3 req/s; stay polite
|
|
|
|
|
|
class AcoustIdHttpClient:
|
|
"""Implements :class:`app.domain.ports.AcoustIdClient`."""
|
|
|
|
_throttle_lock = asyncio.Lock()
|
|
_last_call_monotonic = 0.0
|
|
|
|
def __init__(
|
|
self,
|
|
*,
|
|
api_key: str | None,
|
|
user_agent: str,
|
|
api_url: str = _DEFAULT_URL,
|
|
) -> None:
|
|
self._api_key = api_key
|
|
self._user_agent = user_agent
|
|
self._api_url = api_url
|
|
|
|
def is_available(self) -> bool:
|
|
return bool(self._api_key)
|
|
|
|
async def lookup(self, fingerprint: Fingerprint) -> RecordingMatch | None:
|
|
payload = await self._lookup_raw(fingerprint)
|
|
if payload is None:
|
|
return None
|
|
return _parse_best_match(payload)
|
|
|
|
async def lookup_all(self, fingerprint: Fingerprint) -> list[RecordingMatch]:
|
|
payload = await self._lookup_raw(fingerprint)
|
|
if payload is None:
|
|
return []
|
|
return _parse_matches(payload)
|
|
|
|
async def _lookup_raw(self, fingerprint: Fingerprint) -> object | None:
|
|
if not self._api_key:
|
|
return None
|
|
try:
|
|
await self._throttle()
|
|
async with httpx.AsyncClient(
|
|
timeout=_TIMEOUT_SECONDS,
|
|
headers={"User-Agent": self._user_agent},
|
|
) as client:
|
|
resp = await client.get(
|
|
self._api_url,
|
|
params={
|
|
"client": self._api_key,
|
|
"duration": str(fingerprint.duration_seconds),
|
|
"fingerprint": fingerprint.fingerprint,
|
|
"meta": "recordings releasegroups",
|
|
"format": "json",
|
|
},
|
|
)
|
|
resp.raise_for_status()
|
|
return resp.json() # type: ignore[no-any-return]
|
|
except httpx.HTTPError, ValueError:
|
|
log.warning("acoustid_lookup_failed")
|
|
return None
|
|
|
|
@classmethod
|
|
async def _throttle(cls) -> None:
|
|
async with cls._throttle_lock:
|
|
elapsed = time.monotonic() - cls._last_call_monotonic
|
|
wait = _MIN_INTERVAL_SECONDS - elapsed
|
|
if wait > 0:
|
|
await asyncio.sleep(wait)
|
|
cls._last_call_monotonic = time.monotonic()
|
|
|
|
|
|
_MAX_MATCHES = 5
|
|
|
|
|
|
def _parse_best_match(payload: object) -> RecordingMatch | None:
|
|
matches = _parse_matches(payload)
|
|
return matches[0] if matches else None
|
|
|
|
|
|
def _parse_matches(payload: object) -> list[RecordingMatch]:
|
|
if not isinstance(payload, dict) or payload.get("status") != "ok":
|
|
return []
|
|
results = payload.get("results")
|
|
if not isinstance(results, list) or not results:
|
|
return []
|
|
|
|
# Results are returned best-score-first, but sort defensively and cap the
|
|
# number of candidates surfaced to the editor.
|
|
candidates = [r for r in results if isinstance(r, dict)]
|
|
candidates.sort(key=lambda r: r.get("score", 0.0), reverse=True)
|
|
|
|
matches: list[RecordingMatch] = []
|
|
for result in candidates[:_MAX_MATCHES]:
|
|
match = _parse_one(result)
|
|
if match is not None:
|
|
matches.append(match)
|
|
return matches
|
|
|
|
|
|
def _parse_one(result: dict[str, object]) -> RecordingMatch | None:
|
|
acoustid = result.get("id")
|
|
if not isinstance(acoustid, str):
|
|
return None
|
|
score = float(result.get("score", 0.0)) # type: ignore[arg-type]
|
|
|
|
recording_mbid: str | None = None
|
|
release_group_mbid: str | None = None
|
|
title: str | None = None
|
|
artist: str | None = None
|
|
album: str | None = None
|
|
|
|
recordings = result.get("recordings")
|
|
if isinstance(recordings, list) and recordings and isinstance(recordings[0], dict):
|
|
rec = recordings[0]
|
|
recording_mbid = rec.get("id") if isinstance(rec.get("id"), str) else None
|
|
title = rec.get("title") if isinstance(rec.get("title"), str) else None
|
|
artists = rec.get("artists")
|
|
if isinstance(artists, list) and artists and isinstance(artists[0], dict):
|
|
name = artists[0].get("name")
|
|
artist = name if isinstance(name, str) else None
|
|
groups = rec.get("releasegroups")
|
|
if isinstance(groups, list) and groups and isinstance(groups[0], dict):
|
|
group = groups[0]
|
|
gtitle = group.get("title")
|
|
album = gtitle if isinstance(gtitle, str) else None
|
|
gid = group.get("id")
|
|
release_group_mbid = gid if isinstance(gid, str) else None
|
|
|
|
return RecordingMatch(
|
|
acoustid=acoustid,
|
|
score=score,
|
|
recording_mbid=recording_mbid,
|
|
release_group_mbid=release_group_mbid,
|
|
title=title,
|
|
artist=artist,
|
|
album=album,
|
|
year=None,
|
|
)
|