feat(library): lazy materialization foundation for remote tracks (§Phase1)
Docker Build & Publish / build (push) Successful in 1m10s
Docker Build & Publish / push (push) Failing after 7s
Docker Build & Publish / Prune old image versions (push) Has been skipped

Adds nullable storage fields + availability column on tracks, remote
source/source_id identity on albums/artists, TrackRepository.materialize()
and get_or_create_remote() repos — groundwork for on-demand YTM library
(placeholders saved without audio, materialized in-place on first play).

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Senko-san
2026-06-14 17:51:43 +03:00
parent 78007461e1
commit 58b98ab5ed
24 changed files with 492 additions and 31 deletions
+11 -1
View File
@@ -2,7 +2,7 @@
import uuid
from sqlalchemy import ForeignKey, Integer, String
from sqlalchemy import ForeignKey, Integer, String, UniqueConstraint
from sqlalchemy.orm import Mapped, mapped_column
from app.infrastructure.db.base import Base
@@ -11,6 +11,12 @@ from app.infrastructure.db.models.mixins import TimestampMixin, UUIDPrimaryKeyMi
class AlbumModel(UUIDPrimaryKeyMixin, TimestampMixin, Base):
__tablename__ = "albums"
__table_args__ = (
# Binds a remote (browsable) album to its local row for re-browse/save
# dedup. Multiple NULLs are allowed by Postgres, so locally-created
# albums (source/source_id both NULL) never collide on this.
UniqueConstraint("source", "source_id", name="uq_albums_source_source_id"),
)
title: Mapped[str] = mapped_column(String(1024), index=True, nullable=False)
artist_id: Mapped[uuid.UUID] = mapped_column(
@@ -21,3 +27,7 @@ class AlbumModel(UUIDPrimaryKeyMixin, TimestampMixin, Base):
year: Mapped[int | None] = mapped_column(Integer, nullable=True)
cover_path: Mapped[str | None] = mapped_column(String(1024), nullable=True)
musicbrainz_id: Mapped[str | None] = mapped_column(String(36), index=True, nullable=True)
# -- remote identity (lazy materialization) --------------------------
source: Mapped[str | None] = mapped_column(String(32), nullable=True)
source_id: Mapped[str | None] = mapped_column(String(512), nullable=True)
+11 -1
View File
@@ -1,6 +1,6 @@
"""ORM model for artists."""
from sqlalchemy import String
from sqlalchemy import String, UniqueConstraint
from sqlalchemy.orm import Mapped, mapped_column
from app.infrastructure.db.base import Base
@@ -9,6 +9,16 @@ from app.infrastructure.db.models.mixins import TimestampMixin, UUIDPrimaryKeyMi
class ArtistModel(UUIDPrimaryKeyMixin, TimestampMixin, Base):
__tablename__ = "artists"
__table_args__ = (
# Binds a remote (browsable) artist to its local row for re-browse/save
# dedup. Multiple NULLs are allowed by Postgres, so locally-created
# artists (source/source_id both NULL) never collide on this.
UniqueConstraint("source", "source_id", name="uq_artists_source_source_id"),
)
name: Mapped[str] = mapped_column(String(512), index=True, nullable=False)
musicbrainz_id: Mapped[str | None] = mapped_column(String(36), index=True, nullable=True)
# -- remote identity (lazy materialization) --------------------------
source: Mapped[str | None] = mapped_column(String(32), nullable=True)
source_id: Mapped[str | None] = mapped_column(String(512), nullable=True)
+9
View File
@@ -64,3 +64,12 @@ class LyricsStatus(enum.StrEnum):
FOUND = "found"
NOT_FOUND = "not_found"
PENDING = "pending"
class TrackAvailability(enum.StrEnum):
"""Whether a track's audio is on local storage or still a remote placeholder
(plan: lazy materialization). ``remote`` tracks have ``storage_uri = NULL``
until ``TrackRepository.materialize`` fills it in."""
LOCAL = "local"
REMOTE = "remote"
+13 -4
View File
@@ -13,7 +13,7 @@ from sqlalchemy import DateTime, ForeignKey, Integer, String, UniqueConstraint
from sqlalchemy.orm import Mapped, mapped_column
from app.infrastructure.db.base import Base
from app.infrastructure.db.models.enums import MetadataStatus, StoragePolicy
from app.infrastructure.db.models.enums import MetadataStatus, StoragePolicy, TrackAvailability
from app.infrastructure.db.models.mixins import TimestampMixin, UUIDPrimaryKeyMixin
@@ -41,11 +41,20 @@ class TrackModel(UUIDPrimaryKeyMixin, TimestampMixin, Base):
year: Mapped[int | None] = mapped_column(Integer, nullable=True)
# -- file (original, stored as-is) -----------------------------------
storage_uri: Mapped[str] = mapped_column(String(2048), nullable=False)
file_format: Mapped[str] = mapped_column(String(32), nullable=False)
file_size: Mapped[int] = mapped_column(Integer, nullable=False)
# NULL on a remote placeholder (not yet materialized) — see ``availability``.
storage_uri: Mapped[str | None] = mapped_column(String(2048), nullable=True)
file_format: Mapped[str | None] = mapped_column(String(32), nullable=True)
file_size: Mapped[int | None] = mapped_column(Integer, nullable=True)
bitrate: Mapped[int | None] = mapped_column(Integer, nullable=True)
# ``remote`` = placeholder with no local audio yet; materialize() flips this
# to ``local`` once the file is downloaded and ``storage_uri`` is filled in.
availability: Mapped[str] = mapped_column(
String(16),
nullable=False,
default=TrackAvailability.LOCAL.value,
)
# -- dedup / external ids --------------------------------------------
acoustid_fingerprint: Mapped[str | None] = mapped_column(String(64), index=True, nullable=True)
musicbrainz_id: Mapped[str | None] = mapped_column(String(36), index=True, nullable=True)
@@ -18,6 +18,8 @@ def _to_entity(row: AlbumModel) -> Album:
year=row.year,
cover_path=row.cover_path,
musicbrainz_id=row.musicbrainz_id,
source=row.source,
source_id=row.source_id,
created_at=row.created_at,
updated_at=row.updated_at,
)
@@ -63,6 +65,58 @@ class SqlAlchemyAlbumRepository:
await self._session.refresh(row)
return _to_entity(row)
async def get_or_create_remote(
self,
*,
title: str,
artist_id: uuid.UUID,
year: int | None,
musicbrainz_id: str | None,
source: str,
source_id: str,
) -> Album:
"""Resolve an album by ``(source, source_id)`` first (re-browse/save
dedup), falling back to ``(title, artist_id)`` and gap-filling the
remote ids onto an existing row, else creating a new remote-bound row."""
row = (
await self._session.execute(
select(AlbumModel).where(
AlbumModel.source == source,
AlbumModel.source_id == source_id,
)
)
).scalar_one_or_none()
if row is None:
row = (
await self._session.execute(
select(AlbumModel).where(
AlbumModel.title == title,
AlbumModel.artist_id == artist_id,
)
)
).scalar_one_or_none()
if row is None:
row = AlbumModel(
title=title,
artist_id=artist_id,
year=year,
musicbrainz_id=musicbrainz_id,
source=source,
source_id=source_id,
)
self._session.add(row)
else:
if row.year is None and year is not None:
row.year = year
if row.musicbrainz_id is None and musicbrainz_id is not None:
row.musicbrainz_id = musicbrainz_id
if row.source is None and row.source_id is None:
row.source = source
row.source_id = source_id
await self._session.flush()
await self._session.refresh(row)
return _to_entity(row)
async def set_cover_path(self, album_id: uuid.UUID, cover_path: str) -> None:
row = await self._session.get(AlbumModel, album_id)
if row is not None:
@@ -15,6 +15,8 @@ def _to_entity(row: ArtistModel) -> Artist:
return Artist(
id=row.id,
name=row.name,
source=row.source,
source_id=row.source_id,
created_at=row.created_at,
updated_at=row.updated_at,
)
@@ -35,6 +37,32 @@ class SqlAlchemyArtistRepository:
await self._session.refresh(row)
return _to_entity(row)
async def get_or_create_remote(self, *, name: str, source: str, source_id: str) -> Artist:
"""Resolve an artist by ``(source, source_id)`` first (re-browse/save
dedup), falling back to ``name`` and gap-filling the remote ids onto an
existing row, else creating a new remote-bound row."""
row = (
await self._session.execute(
select(ArtistModel).where(
ArtistModel.source == source,
ArtistModel.source_id == source_id,
)
)
).scalar_one_or_none()
if row is None:
row = (
await self._session.execute(select(ArtistModel).where(ArtistModel.name == name))
).scalar_one_or_none()
if row is None:
row = ArtistModel(name=name, source=source, source_id=source_id)
self._session.add(row)
elif row.source is None and row.source_id is None:
row.source = source
row.source_id = source_id
await self._session.flush()
await self._session.refresh(row)
return _to_entity(row)
async def get_by_id(self, artist_id: uuid.UUID) -> Artist | None:
row = await self._session.get(ArtistModel, artist_id)
return _to_entity(row) if row is not None else None
@@ -42,6 +42,7 @@ def _track_to_entity(row: TrackModel) -> Track:
metadata_status=row.metadata_status,
metadata_error=row.metadata_error,
enriched_at=row.enriched_at,
availability=row.availability,
created_at=row.created_at,
updated_at=row.updated_at,
)
@@ -41,6 +41,7 @@ def _track_to_entity(row: TrackModel) -> Track:
metadata_status=row.metadata_status,
metadata_error=row.metadata_error,
enriched_at=row.enriched_at,
availability=row.availability,
created_at=row.created_at,
updated_at=row.updated_at,
)
@@ -10,6 +10,7 @@ from app.domain.entities.storage import FormatBreakdown, LibraryStats
from app.domain.entities.track import Track
from app.domain.errors import NotFoundError
from app.infrastructure.db.models.artist import ArtistModel
from app.infrastructure.db.models.enums import TrackAvailability
from app.infrastructure.db.models.track import TrackModel
@@ -31,6 +32,7 @@ def _to_entity(row: TrackModel) -> Track:
metadata_status=row.metadata_status,
metadata_error=row.metadata_error,
enriched_at=row.enriched_at,
availability=row.availability,
created_at=row.created_at,
updated_at=row.updated_at,
)
@@ -61,13 +63,14 @@ class SqlAlchemyTrackRepository:
id: uuid.UUID,
title: str,
artist_id: uuid.UUID,
storage_uri: str,
file_format: str,
file_size: int,
storage_uri: str | None,
file_format: str | None,
file_size: int | None,
source: str,
source_id: str,
metadata_status: str,
added_by: uuid.UUID | None,
availability: str = TrackAvailability.LOCAL.value,
) -> Track:
row = TrackModel(
id=id,
@@ -80,12 +83,38 @@ class SqlAlchemyTrackRepository:
source_id=source_id,
metadata_status=metadata_status,
added_by=added_by,
availability=availability,
)
self._session.add(row)
await self._session.flush()
await self._session.refresh(row)
return _to_entity(row)
async def materialize(
self,
track_id: uuid.UUID,
*,
storage_uri: str,
file_format: str,
file_size: int,
bitrate: int | None,
) -> Track:
"""Fill in a remote placeholder's audio fields after a download (lazy
materialization). ``track.id`` is unchanged, so likes/playlists/queue
entries that already reference it keep working."""
row = await self._session.get(TrackModel, track_id)
if row is None:
raise NotFoundError(f"Track {track_id} not found.")
row.storage_uri = storage_uri
row.file_format = file_format
row.file_size = file_size
if bitrate is not None:
row.bitrate = bitrate
row.availability = TrackAvailability.LOCAL.value
await self._session.flush()
await self._session.refresh(row)
return _to_entity(row)
async def delete(self, track_id: uuid.UUID) -> None:
row = await self._session.get(TrackModel, track_id)
if row is not None:
@@ -130,6 +159,7 @@ class SqlAlchemyTrackRepository:
func.count(TrackModel.id),
func.coalesce(func.sum(TrackModel.file_size), 0),
)
.where(TrackModel.file_format.is_not(None))
.group_by(TrackModel.file_format)
.order_by(func.sum(TrackModel.file_size).desc())
)