feat(library): lazy materialization foundation for remote tracks (§Phase1)
Docker Build & Publish / build (push) Successful in 1m10s
Docker Build & Publish / push (push) Failing after 7s
Docker Build & Publish / Prune old image versions (push) Has been skipped

Adds nullable storage fields + availability column on tracks, remote
source/source_id identity on albums/artists, TrackRepository.materialize()
and get_or_create_remote() repos — groundwork for on-demand YTM library
(placeholders saved without audio, materialized in-place on first play).

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Senko-san
2026-06-14 17:51:43 +03:00
parent 78007461e1
commit 58b98ab5ed
24 changed files with 492 additions and 31 deletions
@@ -0,0 +1,65 @@
"""remote placeholders: track availability, album/artist remote ids
Revision ID: dc126696f5a6
Revises: 20260614_dl_track_id
Create Date: 2026-06-14 11:25:30.643588
"""
from __future__ import annotations
from collections.abc import Sequence
import sqlalchemy as sa
from alembic import op
# revision identifiers, used by Alembic.
revision: str = 'dc126696f5a6'
down_revision: str | None = '20260614_dl_track_id'
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None
def upgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.add_column('albums', sa.Column('source', sa.String(length=32), nullable=True))
op.add_column('albums', sa.Column('source_id', sa.String(length=512), nullable=True))
op.create_unique_constraint('uq_albums_source_source_id', 'albums', ['source', 'source_id'])
op.add_column('artists', sa.Column('source', sa.String(length=32), nullable=True))
op.add_column('artists', sa.Column('source_id', sa.String(length=512), nullable=True))
op.create_unique_constraint('uq_artists_source_source_id', 'artists', ['source', 'source_id'])
op.add_column(
'tracks',
sa.Column('availability', sa.String(length=16), nullable=False, server_default='local'),
)
op.alter_column('tracks', 'availability', server_default=None)
op.alter_column('tracks', 'storage_uri',
existing_type=sa.VARCHAR(length=2048),
nullable=True)
op.alter_column('tracks', 'file_format',
existing_type=sa.VARCHAR(length=32),
nullable=True)
op.alter_column('tracks', 'file_size',
existing_type=sa.INTEGER(),
nullable=True)
# ### end Alembic commands ###
def downgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.alter_column('tracks', 'file_size',
existing_type=sa.INTEGER(),
nullable=False)
op.alter_column('tracks', 'file_format',
existing_type=sa.VARCHAR(length=32),
nullable=False)
op.alter_column('tracks', 'storage_uri',
existing_type=sa.VARCHAR(length=2048),
nullable=False)
op.drop_column('tracks', 'availability')
op.drop_constraint('uq_artists_source_source_id', 'artists', type_='unique')
op.drop_column('artists', 'source_id')
op.drop_column('artists', 'source')
op.drop_constraint('uq_albums_source_source_id', 'albums', type_='unique')
op.drop_column('albums', 'source_id')
op.drop_column('albums', 'source')
# ### end Alembic commands ###
+1 -1
View File
@@ -65,7 +65,7 @@ async def download(
if track is None: if track is None:
raise NotFoundError("Song not found.") raise NotFoundError("Song not found.")
result = await service.open_stream(track_id, None) result = await service.open_stream(track_id, None)
filename = f"{track.title}.{track.file_format}" filename = f"{track.title}.{track.file_format or 'bin'}"
headers = { headers = {
"Content-Length": str(result.content_length), "Content-Length": str(result.content_length),
"Content-Disposition": f'attachment; filename="{filename}"', "Content-Disposition": f'attachment; filename="{filename}"',
+2 -2
View File
@@ -80,8 +80,8 @@ def song_dict(
"albumId": encode_album(track.album_id) if track.album_id is not None else None, "albumId": encode_album(track.album_id) if track.album_id is not None else None,
"artistId": encode_artist(track.artist_id), "artistId": encode_artist(track.artist_id),
"coverArt": cover, "coverArt": cover,
"size": track.file_size, "size": track.file_size or 0,
"contentType": content_type_for(track.file_format), "contentType": content_type_for(track.file_format or ""),
"suffix": track.file_format, "suffix": track.file_format,
"duration": track.duration_seconds, "duration": track.duration_seconds,
"year": track.year, "year": track.year,
+3 -2
View File
@@ -14,14 +14,15 @@ class TrackOut(BaseModel):
album_id: uuid.UUID | None album_id: uuid.UUID | None
album_title: str | None album_title: str | None
duration_seconds: int | None duration_seconds: int | None
file_format: str file_format: str | None
file_size: int file_size: int | None
genre: str | None genre: str | None
year: int | None year: int | None
track_number: int | None track_number: int | None
metadata_status: str metadata_status: str
metadata_error: str | None metadata_error: str | None
enriched_at: dt.datetime | None enriched_at: dt.datetime | None
availability: str
source: str source: str
has_cover: bool has_cover: bool
created_at: dt.datetime created_at: dt.datetime
+2
View File
@@ -54,6 +54,7 @@ async def _build_track_out(
metadata_status=t.metadata_status, metadata_status=t.metadata_status,
metadata_error=t.metadata_error, metadata_error=t.metadata_error,
enriched_at=t.enriched_at, enriched_at=t.enriched_at,
availability=t.availability,
source=t.source, source=t.source,
has_cover=bool(t.album_id and albums.get(t.album_id) and albums[t.album_id].cover_path), has_cover=bool(t.album_id and albums.get(t.album_id) and albums[t.album_id].cover_path),
created_at=t.created_at, created_at=t.created_at,
@@ -155,6 +156,7 @@ async def delete_track(
if track is None: if track is None:
raise NotFoundError(f"Track {track_id} not found.") raise NotFoundError(f"Track {track_id} not found.")
await track_repo.delete(track_id) await track_repo.delete(track_id)
if track.storage_uri is not None:
await storage.delete(track.storage_uri) await storage.delete(track.storage_uri)
return Response(status_code=204) return Response(status_code=204)
+9 -3
View File
@@ -79,9 +79,13 @@ class MetadataEnrichmentService:
if track.metadata_status == "manual": if track.metadata_status == "manual":
log.info("enrich_skip_manual", track_id=str(track_id)) log.info("enrich_skip_manual", track_id=str(track_id))
return EnrichmentResult(track_id=track_id, status="skipped") return EnrichmentResult(track_id=track_id, status="skipped")
storage_uri = track.storage_uri
if storage_uri is None:
log.info("enrich_skip_remote", track_id=str(track_id))
return EnrichmentResult(track_id=track_id, status="skipped")
tags = await self._read_local(track.storage_uri) tags = await self._read_local(storage_uri)
match = await self._identify(track.storage_uri) match = await self._identify(storage_uri)
# Merge order is tag-first by default — embedded tags fix the common # Merge order is tag-first by default — embedded tags fix the common
# well-tagged offline case. But a *high-confidence* AcoustID match is the # well-tagged offline case. But a *high-confidence* AcoustID match is the
@@ -125,7 +129,7 @@ class MetadataEnrichmentService:
if album is not None: if album is not None:
await self._resolve_cover( await self._resolve_cover(
album, album,
storage_uri=track.storage_uri, storage_uri=storage_uri,
release_group_mbid=match.release_group_mbid if match else None, release_group_mbid=match.release_group_mbid if match else None,
) )
@@ -175,6 +179,8 @@ class MetadataEnrichmentService:
return [] return []
if not self._acoustid.is_available() or not self._fingerprinter.is_available(): if not self._acoustid.is_available() or not self._fingerprinter.is_available():
return [] return []
if track.storage_uri is None:
return []
try: try:
async with self._storage.as_local_path(track.storage_uri) as path: async with self._storage.as_local_path(track.storage_uri) as path:
fingerprint = await self._fingerprinter.calculate(path) fingerprint = await self._fingerprinter.calculate(path)
+6 -3
View File
@@ -72,16 +72,19 @@ class StreamingService:
track = await self._tracks.get_by_id(track_id) track = await self._tracks.get_by_id(track_id)
if track is None: if track is None:
raise NotFoundError("Track not found.") raise NotFoundError("Track not found.")
storage_uri = track.storage_uri
if storage_uri is None:
raise NotFoundError("Track is not yet downloaded.")
stat = await self._storage.stat(track.storage_uri) stat = await self._storage.stat(storage_uri)
total_size = stat.size total_size = stat.size
content_type = stat.content_type or _FORMAT_CONTENT_TYPE.get( content_type = stat.content_type or _FORMAT_CONTENT_TYPE.get(
track.file_format.lower(), "application/octet-stream" (track.file_format or "").lower(), "application/octet-stream"
) )
start, end, is_partial = _parse_range(range_header, total_size) start, end, is_partial = _parse_range(range_header, total_size)
stream, _ = await self._storage.open_range(track.storage_uri, start, end) stream, _ = await self._storage.open_range(storage_uri, start, end)
actual_end = end if end is not None else total_size - 1 actual_end = end if end is not None else total_size - 1
content_length = actual_end - start + 1 content_length = actual_end - start + 1
+2
View File
@@ -13,5 +13,7 @@ class Album:
year: int | None year: int | None
cover_path: str | None cover_path: str | None
musicbrainz_id: str | None musicbrainz_id: str | None
source: str | None
source_id: str | None
created_at: dt.datetime created_at: dt.datetime
updated_at: dt.datetime updated_at: dt.datetime
+6 -3
View File
@@ -9,6 +9,8 @@ from dataclasses import dataclass
class Artist: class Artist:
id: uuid.UUID id: uuid.UUID
name: str name: str
source: str | None
source_id: str | None
created_at: dt.datetime created_at: dt.datetime
updated_at: dt.datetime updated_at: dt.datetime
@@ -19,9 +21,9 @@ class Track:
title: str title: str
artist_id: uuid.UUID artist_id: uuid.UUID
album_id: uuid.UUID | None album_id: uuid.UUID | None
storage_uri: str storage_uri: str | None
file_format: str file_format: str | None
file_size: int file_size: int | None
source: str source: str
source_id: str source_id: str
duration_seconds: int | None duration_seconds: int | None
@@ -31,5 +33,6 @@ class Track:
metadata_status: str metadata_status: str
metadata_error: str | None metadata_error: str | None
enriched_at: dt.datetime | None enriched_at: dt.datetime | None
availability: str
created_at: dt.datetime created_at: dt.datetime
updated_at: dt.datetime updated_at: dt.datetime
+36 -3
View File
@@ -114,6 +114,11 @@ class FileStorage(Protocol):
class ArtistRepository(Protocol): class ArtistRepository(Protocol):
async def get_or_create(self, name: str) -> Artist: ... async def get_or_create(self, name: str) -> Artist: ...
async def get_or_create_remote(self, *, name: str, source: str, source_id: str) -> Artist:
"""Resolve/create an artist bound to a remote ``(source, source_id)``
(lazy materialization save-to-library)."""
...
async def get_by_id(self, artist_id: uuid.UUID) -> Artist | None: ... async def get_by_id(self, artist_id: uuid.UUID) -> Artist | None: ...
async def get_many(self, ids: list[uuid.UUID]) -> list[Artist]: ... async def get_many(self, ids: list[uuid.UUID]) -> list[Artist]: ...
async def list(self, *, q: str | None, limit: int, offset: int) -> list[Artist]: ... async def list(self, *, q: str | None, limit: int, offset: int) -> list[Artist]: ...
@@ -131,14 +136,28 @@ class TrackRepository(Protocol):
id: uuid.UUID, id: uuid.UUID,
title: str, title: str,
artist_id: uuid.UUID, artist_id: uuid.UUID,
storage_uri: str, storage_uri: str | None,
file_format: str, file_format: str | None,
file_size: int, file_size: int | None,
source: str, source: str,
source_id: str, source_id: str,
metadata_status: str, metadata_status: str,
added_by: uuid.UUID | None, added_by: uuid.UUID | None,
availability: str = ...,
) -> Track: ... ) -> Track: ...
async def materialize(
self,
track_id: uuid.UUID,
*,
storage_uri: str,
file_format: str,
file_size: int,
bitrate: int | None,
) -> Track:
"""Fill in a remote placeholder's audio fields after a download
(lazy materialization), flipping ``availability`` to ``local``."""
...
async def delete(self, track_id: uuid.UUID) -> None: ... async def delete(self, track_id: uuid.UUID) -> None: ...
# genres / library_stats must come before ``list`` — the method named # genres / library_stats must come before ``list`` — the method named
# ``list`` shadows the builtin in later annotations (same pattern as # ``list`` shadows the builtin in later annotations (same pattern as
@@ -212,6 +231,20 @@ class AlbumRepository(Protocol):
year: int | None, year: int | None,
musicbrainz_id: str | None, musicbrainz_id: str | None,
) -> Album: ... ) -> Album: ...
async def get_or_create_remote(
self,
*,
title: str,
artist_id: uuid.UUID,
year: int | None,
musicbrainz_id: str | None,
source: str,
source_id: str,
) -> Album:
"""Resolve/create an album bound to a remote ``(source, source_id)``
(lazy materialization save-to-library)."""
...
async def set_cover_path(self, album_id: uuid.UUID, cover_path: str) -> None: ... async def set_cover_path(self, album_id: uuid.UUID, cover_path: str) -> None: ...
async def get_by_id(self, album_id: uuid.UUID) -> Album | None: ... async def get_by_id(self, album_id: uuid.UUID) -> Album | None: ...
async def get_many(self, ids: list[uuid.UUID]) -> list[Album]: ... async def get_many(self, ids: list[uuid.UUID]) -> list[Album]: ...
+11 -1
View File
@@ -2,7 +2,7 @@
import uuid import uuid
from sqlalchemy import ForeignKey, Integer, String from sqlalchemy import ForeignKey, Integer, String, UniqueConstraint
from sqlalchemy.orm import Mapped, mapped_column from sqlalchemy.orm import Mapped, mapped_column
from app.infrastructure.db.base import Base from app.infrastructure.db.base import Base
@@ -11,6 +11,12 @@ from app.infrastructure.db.models.mixins import TimestampMixin, UUIDPrimaryKeyMi
class AlbumModel(UUIDPrimaryKeyMixin, TimestampMixin, Base): class AlbumModel(UUIDPrimaryKeyMixin, TimestampMixin, Base):
__tablename__ = "albums" __tablename__ = "albums"
__table_args__ = (
# Binds a remote (browsable) album to its local row for re-browse/save
# dedup. Multiple NULLs are allowed by Postgres, so locally-created
# albums (source/source_id both NULL) never collide on this.
UniqueConstraint("source", "source_id", name="uq_albums_source_source_id"),
)
title: Mapped[str] = mapped_column(String(1024), index=True, nullable=False) title: Mapped[str] = mapped_column(String(1024), index=True, nullable=False)
artist_id: Mapped[uuid.UUID] = mapped_column( artist_id: Mapped[uuid.UUID] = mapped_column(
@@ -21,3 +27,7 @@ class AlbumModel(UUIDPrimaryKeyMixin, TimestampMixin, Base):
year: Mapped[int | None] = mapped_column(Integer, nullable=True) year: Mapped[int | None] = mapped_column(Integer, nullable=True)
cover_path: Mapped[str | None] = mapped_column(String(1024), nullable=True) cover_path: Mapped[str | None] = mapped_column(String(1024), nullable=True)
musicbrainz_id: Mapped[str | None] = mapped_column(String(36), index=True, nullable=True) musicbrainz_id: Mapped[str | None] = mapped_column(String(36), index=True, nullable=True)
# -- remote identity (lazy materialization) --------------------------
source: Mapped[str | None] = mapped_column(String(32), nullable=True)
source_id: Mapped[str | None] = mapped_column(String(512), nullable=True)
+11 -1
View File
@@ -1,6 +1,6 @@
"""ORM model for artists.""" """ORM model for artists."""
from sqlalchemy import String from sqlalchemy import String, UniqueConstraint
from sqlalchemy.orm import Mapped, mapped_column from sqlalchemy.orm import Mapped, mapped_column
from app.infrastructure.db.base import Base from app.infrastructure.db.base import Base
@@ -9,6 +9,16 @@ from app.infrastructure.db.models.mixins import TimestampMixin, UUIDPrimaryKeyMi
class ArtistModel(UUIDPrimaryKeyMixin, TimestampMixin, Base): class ArtistModel(UUIDPrimaryKeyMixin, TimestampMixin, Base):
__tablename__ = "artists" __tablename__ = "artists"
__table_args__ = (
# Binds a remote (browsable) artist to its local row for re-browse/save
# dedup. Multiple NULLs are allowed by Postgres, so locally-created
# artists (source/source_id both NULL) never collide on this.
UniqueConstraint("source", "source_id", name="uq_artists_source_source_id"),
)
name: Mapped[str] = mapped_column(String(512), index=True, nullable=False) name: Mapped[str] = mapped_column(String(512), index=True, nullable=False)
musicbrainz_id: Mapped[str | None] = mapped_column(String(36), index=True, nullable=True) musicbrainz_id: Mapped[str | None] = mapped_column(String(36), index=True, nullable=True)
# -- remote identity (lazy materialization) --------------------------
source: Mapped[str | None] = mapped_column(String(32), nullable=True)
source_id: Mapped[str | None] = mapped_column(String(512), nullable=True)
+9
View File
@@ -64,3 +64,12 @@ class LyricsStatus(enum.StrEnum):
FOUND = "found" FOUND = "found"
NOT_FOUND = "not_found" NOT_FOUND = "not_found"
PENDING = "pending" PENDING = "pending"
class TrackAvailability(enum.StrEnum):
"""Whether a track's audio is on local storage or still a remote placeholder
(plan: lazy materialization). ``remote`` tracks have ``storage_uri = NULL``
until ``TrackRepository.materialize`` fills it in."""
LOCAL = "local"
REMOTE = "remote"
+13 -4
View File
@@ -13,7 +13,7 @@ from sqlalchemy import DateTime, ForeignKey, Integer, String, UniqueConstraint
from sqlalchemy.orm import Mapped, mapped_column from sqlalchemy.orm import Mapped, mapped_column
from app.infrastructure.db.base import Base from app.infrastructure.db.base import Base
from app.infrastructure.db.models.enums import MetadataStatus, StoragePolicy from app.infrastructure.db.models.enums import MetadataStatus, StoragePolicy, TrackAvailability
from app.infrastructure.db.models.mixins import TimestampMixin, UUIDPrimaryKeyMixin from app.infrastructure.db.models.mixins import TimestampMixin, UUIDPrimaryKeyMixin
@@ -41,11 +41,20 @@ class TrackModel(UUIDPrimaryKeyMixin, TimestampMixin, Base):
year: Mapped[int | None] = mapped_column(Integer, nullable=True) year: Mapped[int | None] = mapped_column(Integer, nullable=True)
# -- file (original, stored as-is) ----------------------------------- # -- file (original, stored as-is) -----------------------------------
storage_uri: Mapped[str] = mapped_column(String(2048), nullable=False) # NULL on a remote placeholder (not yet materialized) — see ``availability``.
file_format: Mapped[str] = mapped_column(String(32), nullable=False) storage_uri: Mapped[str | None] = mapped_column(String(2048), nullable=True)
file_size: Mapped[int] = mapped_column(Integer, nullable=False) file_format: Mapped[str | None] = mapped_column(String(32), nullable=True)
file_size: Mapped[int | None] = mapped_column(Integer, nullable=True)
bitrate: Mapped[int | None] = mapped_column(Integer, nullable=True) bitrate: Mapped[int | None] = mapped_column(Integer, nullable=True)
# ``remote`` = placeholder with no local audio yet; materialize() flips this
# to ``local`` once the file is downloaded and ``storage_uri`` is filled in.
availability: Mapped[str] = mapped_column(
String(16),
nullable=False,
default=TrackAvailability.LOCAL.value,
)
# -- dedup / external ids -------------------------------------------- # -- dedup / external ids --------------------------------------------
acoustid_fingerprint: Mapped[str | None] = mapped_column(String(64), index=True, nullable=True) acoustid_fingerprint: Mapped[str | None] = mapped_column(String(64), index=True, nullable=True)
musicbrainz_id: Mapped[str | None] = mapped_column(String(36), index=True, nullable=True) musicbrainz_id: Mapped[str | None] = mapped_column(String(36), index=True, nullable=True)
@@ -18,6 +18,8 @@ def _to_entity(row: AlbumModel) -> Album:
year=row.year, year=row.year,
cover_path=row.cover_path, cover_path=row.cover_path,
musicbrainz_id=row.musicbrainz_id, musicbrainz_id=row.musicbrainz_id,
source=row.source,
source_id=row.source_id,
created_at=row.created_at, created_at=row.created_at,
updated_at=row.updated_at, updated_at=row.updated_at,
) )
@@ -63,6 +65,58 @@ class SqlAlchemyAlbumRepository:
await self._session.refresh(row) await self._session.refresh(row)
return _to_entity(row) return _to_entity(row)
async def get_or_create_remote(
self,
*,
title: str,
artist_id: uuid.UUID,
year: int | None,
musicbrainz_id: str | None,
source: str,
source_id: str,
) -> Album:
"""Resolve an album by ``(source, source_id)`` first (re-browse/save
dedup), falling back to ``(title, artist_id)`` and gap-filling the
remote ids onto an existing row, else creating a new remote-bound row."""
row = (
await self._session.execute(
select(AlbumModel).where(
AlbumModel.source == source,
AlbumModel.source_id == source_id,
)
)
).scalar_one_or_none()
if row is None:
row = (
await self._session.execute(
select(AlbumModel).where(
AlbumModel.title == title,
AlbumModel.artist_id == artist_id,
)
)
).scalar_one_or_none()
if row is None:
row = AlbumModel(
title=title,
artist_id=artist_id,
year=year,
musicbrainz_id=musicbrainz_id,
source=source,
source_id=source_id,
)
self._session.add(row)
else:
if row.year is None and year is not None:
row.year = year
if row.musicbrainz_id is None and musicbrainz_id is not None:
row.musicbrainz_id = musicbrainz_id
if row.source is None and row.source_id is None:
row.source = source
row.source_id = source_id
await self._session.flush()
await self._session.refresh(row)
return _to_entity(row)
async def set_cover_path(self, album_id: uuid.UUID, cover_path: str) -> None: async def set_cover_path(self, album_id: uuid.UUID, cover_path: str) -> None:
row = await self._session.get(AlbumModel, album_id) row = await self._session.get(AlbumModel, album_id)
if row is not None: if row is not None:
@@ -15,6 +15,8 @@ def _to_entity(row: ArtistModel) -> Artist:
return Artist( return Artist(
id=row.id, id=row.id,
name=row.name, name=row.name,
source=row.source,
source_id=row.source_id,
created_at=row.created_at, created_at=row.created_at,
updated_at=row.updated_at, updated_at=row.updated_at,
) )
@@ -35,6 +37,32 @@ class SqlAlchemyArtistRepository:
await self._session.refresh(row) await self._session.refresh(row)
return _to_entity(row) return _to_entity(row)
async def get_or_create_remote(self, *, name: str, source: str, source_id: str) -> Artist:
"""Resolve an artist by ``(source, source_id)`` first (re-browse/save
dedup), falling back to ``name`` and gap-filling the remote ids onto an
existing row, else creating a new remote-bound row."""
row = (
await self._session.execute(
select(ArtistModel).where(
ArtistModel.source == source,
ArtistModel.source_id == source_id,
)
)
).scalar_one_or_none()
if row is None:
row = (
await self._session.execute(select(ArtistModel).where(ArtistModel.name == name))
).scalar_one_or_none()
if row is None:
row = ArtistModel(name=name, source=source, source_id=source_id)
self._session.add(row)
elif row.source is None and row.source_id is None:
row.source = source
row.source_id = source_id
await self._session.flush()
await self._session.refresh(row)
return _to_entity(row)
async def get_by_id(self, artist_id: uuid.UUID) -> Artist | None: async def get_by_id(self, artist_id: uuid.UUID) -> Artist | None:
row = await self._session.get(ArtistModel, artist_id) row = await self._session.get(ArtistModel, artist_id)
return _to_entity(row) if row is not None else None return _to_entity(row) if row is not None else None
@@ -42,6 +42,7 @@ def _track_to_entity(row: TrackModel) -> Track:
metadata_status=row.metadata_status, metadata_status=row.metadata_status,
metadata_error=row.metadata_error, metadata_error=row.metadata_error,
enriched_at=row.enriched_at, enriched_at=row.enriched_at,
availability=row.availability,
created_at=row.created_at, created_at=row.created_at,
updated_at=row.updated_at, updated_at=row.updated_at,
) )
@@ -41,6 +41,7 @@ def _track_to_entity(row: TrackModel) -> Track:
metadata_status=row.metadata_status, metadata_status=row.metadata_status,
metadata_error=row.metadata_error, metadata_error=row.metadata_error,
enriched_at=row.enriched_at, enriched_at=row.enriched_at,
availability=row.availability,
created_at=row.created_at, created_at=row.created_at,
updated_at=row.updated_at, updated_at=row.updated_at,
) )
@@ -10,6 +10,7 @@ from app.domain.entities.storage import FormatBreakdown, LibraryStats
from app.domain.entities.track import Track from app.domain.entities.track import Track
from app.domain.errors import NotFoundError from app.domain.errors import NotFoundError
from app.infrastructure.db.models.artist import ArtistModel from app.infrastructure.db.models.artist import ArtistModel
from app.infrastructure.db.models.enums import TrackAvailability
from app.infrastructure.db.models.track import TrackModel from app.infrastructure.db.models.track import TrackModel
@@ -31,6 +32,7 @@ def _to_entity(row: TrackModel) -> Track:
metadata_status=row.metadata_status, metadata_status=row.metadata_status,
metadata_error=row.metadata_error, metadata_error=row.metadata_error,
enriched_at=row.enriched_at, enriched_at=row.enriched_at,
availability=row.availability,
created_at=row.created_at, created_at=row.created_at,
updated_at=row.updated_at, updated_at=row.updated_at,
) )
@@ -61,13 +63,14 @@ class SqlAlchemyTrackRepository:
id: uuid.UUID, id: uuid.UUID,
title: str, title: str,
artist_id: uuid.UUID, artist_id: uuid.UUID,
storage_uri: str, storage_uri: str | None,
file_format: str, file_format: str | None,
file_size: int, file_size: int | None,
source: str, source: str,
source_id: str, source_id: str,
metadata_status: str, metadata_status: str,
added_by: uuid.UUID | None, added_by: uuid.UUID | None,
availability: str = TrackAvailability.LOCAL.value,
) -> Track: ) -> Track:
row = TrackModel( row = TrackModel(
id=id, id=id,
@@ -80,12 +83,38 @@ class SqlAlchemyTrackRepository:
source_id=source_id, source_id=source_id,
metadata_status=metadata_status, metadata_status=metadata_status,
added_by=added_by, added_by=added_by,
availability=availability,
) )
self._session.add(row) self._session.add(row)
await self._session.flush() await self._session.flush()
await self._session.refresh(row) await self._session.refresh(row)
return _to_entity(row) return _to_entity(row)
async def materialize(
self,
track_id: uuid.UUID,
*,
storage_uri: str,
file_format: str,
file_size: int,
bitrate: int | None,
) -> Track:
"""Fill in a remote placeholder's audio fields after a download (lazy
materialization). ``track.id`` is unchanged, so likes/playlists/queue
entries that already reference it keep working."""
row = await self._session.get(TrackModel, track_id)
if row is None:
raise NotFoundError(f"Track {track_id} not found.")
row.storage_uri = storage_uri
row.file_format = file_format
row.file_size = file_size
if bitrate is not None:
row.bitrate = bitrate
row.availability = TrackAvailability.LOCAL.value
await self._session.flush()
await self._session.refresh(row)
return _to_entity(row)
async def delete(self, track_id: uuid.UUID) -> None: async def delete(self, track_id: uuid.UUID) -> None:
row = await self._session.get(TrackModel, track_id) row = await self._session.get(TrackModel, track_id)
if row is not None: if row is not None:
@@ -130,6 +159,7 @@ class SqlAlchemyTrackRepository:
func.count(TrackModel.id), func.count(TrackModel.id),
func.coalesce(func.sum(TrackModel.file_size), 0), func.coalesce(func.sum(TrackModel.file_size), 0),
) )
.where(TrackModel.file_format.is_not(None))
.group_by(TrackModel.file_format) .group_by(TrackModel.file_format)
.order_by(func.sum(TrackModel.file_size).desc()) .order_by(func.sum(TrackModel.file_size).desc())
) )
+10 -1
View File
@@ -16,7 +16,14 @@ pytestmark = pytest.mark.asyncio
class FakeArtistRepo: class FakeArtistRepo:
async def get_or_create(self, name: str) -> Artist: async def get_or_create(self, name: str) -> Artist:
now = dt.datetime.now(dt.UTC) now = dt.datetime.now(dt.UTC)
return Artist(id=uuid.uuid4(), name=name, created_at=now, updated_at=now) return Artist(
id=uuid.uuid4(),
name=name,
source=None,
source_id=None,
created_at=now,
updated_at=now,
)
class FakeTrackRepo: class FakeTrackRepo:
@@ -46,6 +53,7 @@ class FakeTrackRepo:
metadata_status=str(kw["metadata_status"]), metadata_status=str(kw["metadata_status"]),
metadata_error=None, metadata_error=None,
enriched_at=None, enriched_at=None,
availability="local",
created_at=now, created_at=now,
updated_at=now, updated_at=now,
) )
@@ -140,6 +148,7 @@ def _track(source: str, source_id: str) -> Track:
metadata_status="pending", metadata_status="pending",
metadata_error=None, metadata_error=None,
enriched_at=None, enriched_at=None,
availability="local",
created_at=now, created_at=now,
updated_at=now, updated_at=now,
) )
+10 -1
View File
@@ -20,7 +20,14 @@ class FakeArtistRepo:
async def get_or_create(self, name: str) -> Artist: async def get_or_create(self, name: str) -> Artist:
if name not in self._by_name: if name not in self._by_name:
now = dt.datetime.now(dt.UTC) now = dt.datetime.now(dt.UTC)
self._by_name[name] = Artist(id=uuid.uuid4(), name=name, created_at=now, updated_at=now) self._by_name[name] = Artist(
id=uuid.uuid4(),
name=name,
source=None,
source_id=None,
created_at=now,
updated_at=now,
)
return self._by_name[name] return self._by_name[name]
@@ -55,6 +62,7 @@ class FakeTrackRepo:
metadata_status=str(kw["metadata_status"]), metadata_status=str(kw["metadata_status"]),
metadata_error=None, metadata_error=None,
enriched_at=None, enriched_at=None,
availability="local",
created_at=now, created_at=now,
updated_at=now, updated_at=now,
) )
@@ -139,6 +147,7 @@ async def test_dedup_skips_already_imported() -> None:
metadata_status="pending", metadata_status="pending",
metadata_error=None, metadata_error=None,
enriched_at=None, enriched_at=None,
availability="local",
created_at=now, created_at=now,
updated_at=now, updated_at=now,
) )
+156
View File
@@ -0,0 +1,156 @@
"""Integration tests for the lazy-materialization foundation:
``TrackRepository.materialize`` and ``Album``/``ArtistRepository.get_or_create_remote``.
Requires a reachable Postgres; skips otherwise (same pattern as
``test_upload_stream_api.py``).
"""
import asyncio
import uuid
from collections.abc import AsyncIterator
import pytest
from app.infrastructure.db import Base, dispose_engine, get_engine, session_scope
from app.infrastructure.db.models.enums import TrackAvailability
from app.infrastructure.db.repositories import (
SqlAlchemyAlbumRepository,
SqlAlchemyArtistRepository,
SqlAlchemyTrackRepository,
)
pytestmark = pytest.mark.asyncio
_db_reachable_cache: bool | None = None
async def _db_reachable() -> bool:
global _db_reachable_cache
if _db_reachable_cache is not None:
return _db_reachable_cache
from sqlalchemy import text
try:
async with asyncio.timeout(3):
async with get_engine().connect() as conn:
await conn.execute(text("SELECT 1"))
_db_reachable_cache = True
except Exception:
_db_reachable_cache = False
return _db_reachable_cache
@pytest.fixture
async def db() -> AsyncIterator[None]:
if not await _db_reachable():
pytest.skip("Postgres not reachable — integration test skipped.")
async with get_engine().begin() as conn:
await conn.run_sync(Base.metadata.drop_all)
await conn.run_sync(Base.metadata.create_all)
yield None
async with get_engine().begin() as conn:
await conn.run_sync(Base.metadata.drop_all)
await dispose_engine()
async def test_placeholder_track_materializes_in_place(db: None) -> None:
"""A remote placeholder (no storage) gets its audio fields filled in by
``materialize`` without changing ``track.id`` — the stable content id that
likes/playlists/queue may already reference."""
async with session_scope() as session:
artists = SqlAlchemyArtistRepository(session)
tracks = SqlAlchemyTrackRepository(session)
artist = await artists.get_or_create("Some Artist")
track_id = uuid.uuid4()
placeholder = await tracks.add(
id=track_id,
title="Remote Track",
artist_id=artist.id,
storage_uri=None,
file_format=None,
file_size=None,
source="youtube",
source_id="abc123",
metadata_status="pending",
added_by=None,
availability=TrackAvailability.REMOTE.value,
)
assert placeholder.availability == "remote"
assert placeholder.storage_uri is None
materialized = await tracks.materialize(
track_id,
storage_uri="tracks/ab/abc123.m4a",
file_format="m4a",
file_size=12345,
bitrate=160,
)
assert materialized.id == track_id
assert materialized.availability == "local"
assert materialized.storage_uri == "tracks/ab/abc123.m4a"
assert materialized.file_format == "m4a"
assert materialized.file_size == 12345
async def test_artist_get_or_create_remote_dedups_by_remote_id(db: None) -> None:
async with session_scope() as session:
artists = SqlAlchemyArtistRepository(session)
first = await artists.get_or_create_remote(
name="Daft Punk", source="youtube", source_id="UCabc"
)
again = await artists.get_or_create_remote(
name="Daft Punk (different display name)", source="youtube", source_id="UCabc"
)
assert first.id == again.id
assert again.source == "youtube"
assert again.source_id == "UCabc"
async def test_artist_get_or_create_remote_binds_existing_local_artist(db: None) -> None:
async with session_scope() as session:
artists = SqlAlchemyArtistRepository(session)
local = await artists.get_or_create("Pink Floyd")
remote = await artists.get_or_create_remote(
name="Pink Floyd", source="youtube", source_id="UCxyz"
)
assert remote.id == local.id
assert remote.source == "youtube"
assert remote.source_id == "UCxyz"
async def test_album_get_or_create_remote_dedups_by_remote_id(db: None) -> None:
async with session_scope() as session:
artists = SqlAlchemyArtistRepository(session)
albums = SqlAlchemyAlbumRepository(session)
artist = await artists.get_or_create("Daft Punk")
first = await albums.get_or_create_remote(
title="Discovery",
artist_id=artist.id,
year=2001,
musicbrainz_id=None,
source="youtube",
source_id="MPREb_abc",
)
again = await albums.get_or_create_remote(
title="Discovery",
artist_id=artist.id,
year=None,
musicbrainz_id=None,
source="youtube",
source_id="MPREb_abc",
)
assert first.id == again.id
assert again.source == "youtube"
assert again.source_id == "MPREb_abc"
assert again.year == 2001
+11 -1
View File
@@ -42,6 +42,7 @@ def _track(*, metadata_status: str = "pending", title: str = "raw-stem") -> Trac
metadata_status=metadata_status, metadata_status=metadata_status,
metadata_error=None, metadata_error=None,
enriched_at=None, enriched_at=None,
availability="local",
created_at=now, created_at=now,
updated_at=now, updated_at=now,
) )
@@ -67,7 +68,14 @@ class FakeArtistRepo:
async def get_or_create(self, name: str) -> Artist: async def get_or_create(self, name: str) -> Artist:
self.created.append(name) self.created.append(name)
now = dt.datetime.now(dt.UTC) now = dt.datetime.now(dt.UTC)
return Artist(id=uuid.uuid4(), name=name, created_at=now, updated_at=now) return Artist(
id=uuid.uuid4(),
name=name,
source=None,
source_id=None,
created_at=now,
updated_at=now,
)
class FakeAlbumRepo: class FakeAlbumRepo:
@@ -88,6 +96,8 @@ class FakeAlbumRepo:
year=year, year=year,
cover_path=self._existing_cover, cover_path=self._existing_cover,
musicbrainz_id=musicbrainz_id, musicbrainz_id=musicbrainz_id,
source=None,
source_id=None,
created_at=now, created_at=now,
updated_at=now, updated_at=now,
) )
+11 -1
View File
@@ -149,6 +149,7 @@ def _pending_track() -> Track:
metadata_status="pending", metadata_status="pending",
metadata_error=None, metadata_error=None,
enriched_at=None, enriched_at=None,
availability="local",
created_at=now, created_at=now,
updated_at=now, updated_at=now,
) )
@@ -180,7 +181,14 @@ class _FakeArtistRepo:
async def get_or_create(self, name: str) -> Artist: async def get_or_create(self, name: str) -> Artist:
self.created.append(name) self.created.append(name)
now = datetime.now(UTC) now = datetime.now(UTC)
return Artist(id=uuid.uuid4(), name=name, created_at=now, updated_at=now) return Artist(
id=uuid.uuid4(),
name=name,
source=None,
source_id=None,
created_at=now,
updated_at=now,
)
@dataclass @dataclass
@@ -199,6 +207,8 @@ class _FakeAlbumRepo:
year=year, year=year,
cover_path=None, cover_path=None,
musicbrainz_id=musicbrainz_id, musicbrainz_id=musicbrainz_id,
source=None,
source_id=None,
created_at=now, created_at=now,
updated_at=now, updated_at=now,
) )