feat(library): lazy materialization foundation for remote tracks (§Phase1)
Docker Build & Publish / build (push) Successful in 1m10s
Docker Build & Publish / push (push) Failing after 7s
Docker Build & Publish / Prune old image versions (push) Has been skipped

Adds nullable storage fields + availability column on tracks, remote
source/source_id identity on albums/artists, TrackRepository.materialize()
and get_or_create_remote() repos — groundwork for on-demand YTM library
(placeholders saved without audio, materialized in-place on first play).

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Senko-san
2026-06-14 17:51:43 +03:00
parent 78007461e1
commit 58b98ab5ed
24 changed files with 492 additions and 31 deletions
+1 -1
View File
@@ -65,7 +65,7 @@ async def download(
if track is None:
raise NotFoundError("Song not found.")
result = await service.open_stream(track_id, None)
filename = f"{track.title}.{track.file_format}"
filename = f"{track.title}.{track.file_format or 'bin'}"
headers = {
"Content-Length": str(result.content_length),
"Content-Disposition": f'attachment; filename="{filename}"',
+2 -2
View File
@@ -80,8 +80,8 @@ def song_dict(
"albumId": encode_album(track.album_id) if track.album_id is not None else None,
"artistId": encode_artist(track.artist_id),
"coverArt": cover,
"size": track.file_size,
"contentType": content_type_for(track.file_format),
"size": track.file_size or 0,
"contentType": content_type_for(track.file_format or ""),
"suffix": track.file_format,
"duration": track.duration_seconds,
"year": track.year,
+3 -2
View File
@@ -14,14 +14,15 @@ class TrackOut(BaseModel):
album_id: uuid.UUID | None
album_title: str | None
duration_seconds: int | None
file_format: str
file_size: int
file_format: str | None
file_size: int | None
genre: str | None
year: int | None
track_number: int | None
metadata_status: str
metadata_error: str | None
enriched_at: dt.datetime | None
availability: str
source: str
has_cover: bool
created_at: dt.datetime
+3 -1
View File
@@ -54,6 +54,7 @@ async def _build_track_out(
metadata_status=t.metadata_status,
metadata_error=t.metadata_error,
enriched_at=t.enriched_at,
availability=t.availability,
source=t.source,
has_cover=bool(t.album_id and albums.get(t.album_id) and albums[t.album_id].cover_path),
created_at=t.created_at,
@@ -155,7 +156,8 @@ async def delete_track(
if track is None:
raise NotFoundError(f"Track {track_id} not found.")
await track_repo.delete(track_id)
await storage.delete(track.storage_uri)
if track.storage_uri is not None:
await storage.delete(track.storage_uri)
return Response(status_code=204)
+9 -3
View File
@@ -79,9 +79,13 @@ class MetadataEnrichmentService:
if track.metadata_status == "manual":
log.info("enrich_skip_manual", track_id=str(track_id))
return EnrichmentResult(track_id=track_id, status="skipped")
storage_uri = track.storage_uri
if storage_uri is None:
log.info("enrich_skip_remote", track_id=str(track_id))
return EnrichmentResult(track_id=track_id, status="skipped")
tags = await self._read_local(track.storage_uri)
match = await self._identify(track.storage_uri)
tags = await self._read_local(storage_uri)
match = await self._identify(storage_uri)
# Merge order is tag-first by default — embedded tags fix the common
# well-tagged offline case. But a *high-confidence* AcoustID match is the
@@ -125,7 +129,7 @@ class MetadataEnrichmentService:
if album is not None:
await self._resolve_cover(
album,
storage_uri=track.storage_uri,
storage_uri=storage_uri,
release_group_mbid=match.release_group_mbid if match else None,
)
@@ -175,6 +179,8 @@ class MetadataEnrichmentService:
return []
if not self._acoustid.is_available() or not self._fingerprinter.is_available():
return []
if track.storage_uri is None:
return []
try:
async with self._storage.as_local_path(track.storage_uri) as path:
fingerprint = await self._fingerprinter.calculate(path)
+6 -3
View File
@@ -72,16 +72,19 @@ class StreamingService:
track = await self._tracks.get_by_id(track_id)
if track is None:
raise NotFoundError("Track not found.")
storage_uri = track.storage_uri
if storage_uri is None:
raise NotFoundError("Track is not yet downloaded.")
stat = await self._storage.stat(track.storage_uri)
stat = await self._storage.stat(storage_uri)
total_size = stat.size
content_type = stat.content_type or _FORMAT_CONTENT_TYPE.get(
track.file_format.lower(), "application/octet-stream"
(track.file_format or "").lower(), "application/octet-stream"
)
start, end, is_partial = _parse_range(range_header, total_size)
stream, _ = await self._storage.open_range(track.storage_uri, start, end)
stream, _ = await self._storage.open_range(storage_uri, start, end)
actual_end = end if end is not None else total_size - 1
content_length = actual_end - start + 1
+2
View File
@@ -13,5 +13,7 @@ class Album:
year: int | None
cover_path: str | None
musicbrainz_id: str | None
source: str | None
source_id: str | None
created_at: dt.datetime
updated_at: dt.datetime
+6 -3
View File
@@ -9,6 +9,8 @@ from dataclasses import dataclass
class Artist:
id: uuid.UUID
name: str
source: str | None
source_id: str | None
created_at: dt.datetime
updated_at: dt.datetime
@@ -19,9 +21,9 @@ class Track:
title: str
artist_id: uuid.UUID
album_id: uuid.UUID | None
storage_uri: str
file_format: str
file_size: int
storage_uri: str | None
file_format: str | None
file_size: int | None
source: str
source_id: str
duration_seconds: int | None
@@ -31,5 +33,6 @@ class Track:
metadata_status: str
metadata_error: str | None
enriched_at: dt.datetime | None
availability: str
created_at: dt.datetime
updated_at: dt.datetime
+36 -3
View File
@@ -114,6 +114,11 @@ class FileStorage(Protocol):
class ArtistRepository(Protocol):
async def get_or_create(self, name: str) -> Artist: ...
async def get_or_create_remote(self, *, name: str, source: str, source_id: str) -> Artist:
"""Resolve/create an artist bound to a remote ``(source, source_id)``
(lazy materialization save-to-library)."""
...
async def get_by_id(self, artist_id: uuid.UUID) -> Artist | None: ...
async def get_many(self, ids: list[uuid.UUID]) -> list[Artist]: ...
async def list(self, *, q: str | None, limit: int, offset: int) -> list[Artist]: ...
@@ -131,14 +136,28 @@ class TrackRepository(Protocol):
id: uuid.UUID,
title: str,
artist_id: uuid.UUID,
storage_uri: str,
file_format: str,
file_size: int,
storage_uri: str | None,
file_format: str | None,
file_size: int | None,
source: str,
source_id: str,
metadata_status: str,
added_by: uuid.UUID | None,
availability: str = ...,
) -> Track: ...
async def materialize(
self,
track_id: uuid.UUID,
*,
storage_uri: str,
file_format: str,
file_size: int,
bitrate: int | None,
) -> Track:
"""Fill in a remote placeholder's audio fields after a download
(lazy materialization), flipping ``availability`` to ``local``."""
...
async def delete(self, track_id: uuid.UUID) -> None: ...
# genres / library_stats must come before ``list`` — the method named
# ``list`` shadows the builtin in later annotations (same pattern as
@@ -212,6 +231,20 @@ class AlbumRepository(Protocol):
year: int | None,
musicbrainz_id: str | None,
) -> Album: ...
async def get_or_create_remote(
self,
*,
title: str,
artist_id: uuid.UUID,
year: int | None,
musicbrainz_id: str | None,
source: str,
source_id: str,
) -> Album:
"""Resolve/create an album bound to a remote ``(source, source_id)``
(lazy materialization save-to-library)."""
...
async def set_cover_path(self, album_id: uuid.UUID, cover_path: str) -> None: ...
async def get_by_id(self, album_id: uuid.UUID) -> Album | None: ...
async def get_many(self, ids: list[uuid.UUID]) -> list[Album]: ...
+11 -1
View File
@@ -2,7 +2,7 @@
import uuid
from sqlalchemy import ForeignKey, Integer, String
from sqlalchemy import ForeignKey, Integer, String, UniqueConstraint
from sqlalchemy.orm import Mapped, mapped_column
from app.infrastructure.db.base import Base
@@ -11,6 +11,12 @@ from app.infrastructure.db.models.mixins import TimestampMixin, UUIDPrimaryKeyMi
class AlbumModel(UUIDPrimaryKeyMixin, TimestampMixin, Base):
__tablename__ = "albums"
__table_args__ = (
# Binds a remote (browsable) album to its local row for re-browse/save
# dedup. Multiple NULLs are allowed by Postgres, so locally-created
# albums (source/source_id both NULL) never collide on this.
UniqueConstraint("source", "source_id", name="uq_albums_source_source_id"),
)
title: Mapped[str] = mapped_column(String(1024), index=True, nullable=False)
artist_id: Mapped[uuid.UUID] = mapped_column(
@@ -21,3 +27,7 @@ class AlbumModel(UUIDPrimaryKeyMixin, TimestampMixin, Base):
year: Mapped[int | None] = mapped_column(Integer, nullable=True)
cover_path: Mapped[str | None] = mapped_column(String(1024), nullable=True)
musicbrainz_id: Mapped[str | None] = mapped_column(String(36), index=True, nullable=True)
# -- remote identity (lazy materialization) --------------------------
source: Mapped[str | None] = mapped_column(String(32), nullable=True)
source_id: Mapped[str | None] = mapped_column(String(512), nullable=True)
+11 -1
View File
@@ -1,6 +1,6 @@
"""ORM model for artists."""
from sqlalchemy import String
from sqlalchemy import String, UniqueConstraint
from sqlalchemy.orm import Mapped, mapped_column
from app.infrastructure.db.base import Base
@@ -9,6 +9,16 @@ from app.infrastructure.db.models.mixins import TimestampMixin, UUIDPrimaryKeyMi
class ArtistModel(UUIDPrimaryKeyMixin, TimestampMixin, Base):
__tablename__ = "artists"
__table_args__ = (
# Binds a remote (browsable) artist to its local row for re-browse/save
# dedup. Multiple NULLs are allowed by Postgres, so locally-created
# artists (source/source_id both NULL) never collide on this.
UniqueConstraint("source", "source_id", name="uq_artists_source_source_id"),
)
name: Mapped[str] = mapped_column(String(512), index=True, nullable=False)
musicbrainz_id: Mapped[str | None] = mapped_column(String(36), index=True, nullable=True)
# -- remote identity (lazy materialization) --------------------------
source: Mapped[str | None] = mapped_column(String(32), nullable=True)
source_id: Mapped[str | None] = mapped_column(String(512), nullable=True)
+9
View File
@@ -64,3 +64,12 @@ class LyricsStatus(enum.StrEnum):
FOUND = "found"
NOT_FOUND = "not_found"
PENDING = "pending"
class TrackAvailability(enum.StrEnum):
"""Whether a track's audio is on local storage or still a remote placeholder
(plan: lazy materialization). ``remote`` tracks have ``storage_uri = NULL``
until ``TrackRepository.materialize`` fills it in."""
LOCAL = "local"
REMOTE = "remote"
+13 -4
View File
@@ -13,7 +13,7 @@ from sqlalchemy import DateTime, ForeignKey, Integer, String, UniqueConstraint
from sqlalchemy.orm import Mapped, mapped_column
from app.infrastructure.db.base import Base
from app.infrastructure.db.models.enums import MetadataStatus, StoragePolicy
from app.infrastructure.db.models.enums import MetadataStatus, StoragePolicy, TrackAvailability
from app.infrastructure.db.models.mixins import TimestampMixin, UUIDPrimaryKeyMixin
@@ -41,11 +41,20 @@ class TrackModel(UUIDPrimaryKeyMixin, TimestampMixin, Base):
year: Mapped[int | None] = mapped_column(Integer, nullable=True)
# -- file (original, stored as-is) -----------------------------------
storage_uri: Mapped[str] = mapped_column(String(2048), nullable=False)
file_format: Mapped[str] = mapped_column(String(32), nullable=False)
file_size: Mapped[int] = mapped_column(Integer, nullable=False)
# NULL on a remote placeholder (not yet materialized) — see ``availability``.
storage_uri: Mapped[str | None] = mapped_column(String(2048), nullable=True)
file_format: Mapped[str | None] = mapped_column(String(32), nullable=True)
file_size: Mapped[int | None] = mapped_column(Integer, nullable=True)
bitrate: Mapped[int | None] = mapped_column(Integer, nullable=True)
# ``remote`` = placeholder with no local audio yet; materialize() flips this
# to ``local`` once the file is downloaded and ``storage_uri`` is filled in.
availability: Mapped[str] = mapped_column(
String(16),
nullable=False,
default=TrackAvailability.LOCAL.value,
)
# -- dedup / external ids --------------------------------------------
acoustid_fingerprint: Mapped[str | None] = mapped_column(String(64), index=True, nullable=True)
musicbrainz_id: Mapped[str | None] = mapped_column(String(36), index=True, nullable=True)
@@ -18,6 +18,8 @@ def _to_entity(row: AlbumModel) -> Album:
year=row.year,
cover_path=row.cover_path,
musicbrainz_id=row.musicbrainz_id,
source=row.source,
source_id=row.source_id,
created_at=row.created_at,
updated_at=row.updated_at,
)
@@ -63,6 +65,58 @@ class SqlAlchemyAlbumRepository:
await self._session.refresh(row)
return _to_entity(row)
async def get_or_create_remote(
self,
*,
title: str,
artist_id: uuid.UUID,
year: int | None,
musicbrainz_id: str | None,
source: str,
source_id: str,
) -> Album:
"""Resolve an album by ``(source, source_id)`` first (re-browse/save
dedup), falling back to ``(title, artist_id)`` and gap-filling the
remote ids onto an existing row, else creating a new remote-bound row."""
row = (
await self._session.execute(
select(AlbumModel).where(
AlbumModel.source == source,
AlbumModel.source_id == source_id,
)
)
).scalar_one_or_none()
if row is None:
row = (
await self._session.execute(
select(AlbumModel).where(
AlbumModel.title == title,
AlbumModel.artist_id == artist_id,
)
)
).scalar_one_or_none()
if row is None:
row = AlbumModel(
title=title,
artist_id=artist_id,
year=year,
musicbrainz_id=musicbrainz_id,
source=source,
source_id=source_id,
)
self._session.add(row)
else:
if row.year is None and year is not None:
row.year = year
if row.musicbrainz_id is None and musicbrainz_id is not None:
row.musicbrainz_id = musicbrainz_id
if row.source is None and row.source_id is None:
row.source = source
row.source_id = source_id
await self._session.flush()
await self._session.refresh(row)
return _to_entity(row)
async def set_cover_path(self, album_id: uuid.UUID, cover_path: str) -> None:
row = await self._session.get(AlbumModel, album_id)
if row is not None:
@@ -15,6 +15,8 @@ def _to_entity(row: ArtistModel) -> Artist:
return Artist(
id=row.id,
name=row.name,
source=row.source,
source_id=row.source_id,
created_at=row.created_at,
updated_at=row.updated_at,
)
@@ -35,6 +37,32 @@ class SqlAlchemyArtistRepository:
await self._session.refresh(row)
return _to_entity(row)
async def get_or_create_remote(self, *, name: str, source: str, source_id: str) -> Artist:
"""Resolve an artist by ``(source, source_id)`` first (re-browse/save
dedup), falling back to ``name`` and gap-filling the remote ids onto an
existing row, else creating a new remote-bound row."""
row = (
await self._session.execute(
select(ArtistModel).where(
ArtistModel.source == source,
ArtistModel.source_id == source_id,
)
)
).scalar_one_or_none()
if row is None:
row = (
await self._session.execute(select(ArtistModel).where(ArtistModel.name == name))
).scalar_one_or_none()
if row is None:
row = ArtistModel(name=name, source=source, source_id=source_id)
self._session.add(row)
elif row.source is None and row.source_id is None:
row.source = source
row.source_id = source_id
await self._session.flush()
await self._session.refresh(row)
return _to_entity(row)
async def get_by_id(self, artist_id: uuid.UUID) -> Artist | None:
row = await self._session.get(ArtistModel, artist_id)
return _to_entity(row) if row is not None else None
@@ -42,6 +42,7 @@ def _track_to_entity(row: TrackModel) -> Track:
metadata_status=row.metadata_status,
metadata_error=row.metadata_error,
enriched_at=row.enriched_at,
availability=row.availability,
created_at=row.created_at,
updated_at=row.updated_at,
)
@@ -41,6 +41,7 @@ def _track_to_entity(row: TrackModel) -> Track:
metadata_status=row.metadata_status,
metadata_error=row.metadata_error,
enriched_at=row.enriched_at,
availability=row.availability,
created_at=row.created_at,
updated_at=row.updated_at,
)
@@ -10,6 +10,7 @@ from app.domain.entities.storage import FormatBreakdown, LibraryStats
from app.domain.entities.track import Track
from app.domain.errors import NotFoundError
from app.infrastructure.db.models.artist import ArtistModel
from app.infrastructure.db.models.enums import TrackAvailability
from app.infrastructure.db.models.track import TrackModel
@@ -31,6 +32,7 @@ def _to_entity(row: TrackModel) -> Track:
metadata_status=row.metadata_status,
metadata_error=row.metadata_error,
enriched_at=row.enriched_at,
availability=row.availability,
created_at=row.created_at,
updated_at=row.updated_at,
)
@@ -61,13 +63,14 @@ class SqlAlchemyTrackRepository:
id: uuid.UUID,
title: str,
artist_id: uuid.UUID,
storage_uri: str,
file_format: str,
file_size: int,
storage_uri: str | None,
file_format: str | None,
file_size: int | None,
source: str,
source_id: str,
metadata_status: str,
added_by: uuid.UUID | None,
availability: str = TrackAvailability.LOCAL.value,
) -> Track:
row = TrackModel(
id=id,
@@ -80,12 +83,38 @@ class SqlAlchemyTrackRepository:
source_id=source_id,
metadata_status=metadata_status,
added_by=added_by,
availability=availability,
)
self._session.add(row)
await self._session.flush()
await self._session.refresh(row)
return _to_entity(row)
async def materialize(
self,
track_id: uuid.UUID,
*,
storage_uri: str,
file_format: str,
file_size: int,
bitrate: int | None,
) -> Track:
"""Fill in a remote placeholder's audio fields after a download (lazy
materialization). ``track.id`` is unchanged, so likes/playlists/queue
entries that already reference it keep working."""
row = await self._session.get(TrackModel, track_id)
if row is None:
raise NotFoundError(f"Track {track_id} not found.")
row.storage_uri = storage_uri
row.file_format = file_format
row.file_size = file_size
if bitrate is not None:
row.bitrate = bitrate
row.availability = TrackAvailability.LOCAL.value
await self._session.flush()
await self._session.refresh(row)
return _to_entity(row)
async def delete(self, track_id: uuid.UUID) -> None:
row = await self._session.get(TrackModel, track_id)
if row is not None:
@@ -130,6 +159,7 @@ class SqlAlchemyTrackRepository:
func.count(TrackModel.id),
func.coalesce(func.sum(TrackModel.file_size), 0),
)
.where(TrackModel.file_format.is_not(None))
.group_by(TrackModel.file_format)
.order_by(func.sum(TrackModel.file_size).desc())
)