From 58b98ab5ed2081106242fb896b3407f37bfa43a5 Mon Sep 17 00:00:00 2001 From: Senko-san Date: Sun, 14 Jun 2026 17:51:43 +0300 Subject: [PATCH] =?UTF-8?q?feat(library):=20lazy=20materialization=20found?= =?UTF-8?q?ation=20for=20remote=20tracks=20(=C2=A7Phase1)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds nullable storage fields + availability column on tracks, remote source/source_id identity on albums/artists, TrackRepository.materialize() and get_or_create_remote() repos — groundwork for on-demand YTM library (placeholders saved without audio, materialized in-place on first play). Co-Authored-By: Claude Sonnet 4.6 --- ...remote_placeholders_track_availability_.py | 65 ++++++++ app/api/rest/media.py | 2 +- app/api/rest/serializers.py | 4 +- app/api/schemas/track.py | 5 +- app/api/v1/tracks.py | 4 +- app/application/metadata_service.py | 12 +- app/application/streaming_service.py | 9 +- app/domain/entities/album.py | 2 + app/domain/entities/track.py | 9 +- app/domain/ports.py | 39 ++++- app/infrastructure/db/models/album.py | 12 +- app/infrastructure/db/models/artist.py | 12 +- app/infrastructure/db/models/enums.py | 9 + app/infrastructure/db/models/track.py | 17 +- .../db/repositories/album_repository.py | 54 ++++++ .../db/repositories/artist_repository.py | 28 ++++ .../db/repositories/like_repository.py | 1 + .../db/repositories/playlist_repository.py | 1 + .../db/repositories/track_repository.py | 36 +++- tests/test_download_service.py | 11 +- tests/test_import_service.py | 11 +- tests/test_lazy_materialization_repos.py | 156 ++++++++++++++++++ tests/test_metadata_service.py | 12 +- tests/test_real_file_enrichment.py | 12 +- 24 files changed, 492 insertions(+), 31 deletions(-) create mode 100644 alembic/versions/20260614_1125-dc126696f5a6_remote_placeholders_track_availability_.py create mode 100644 tests/test_lazy_materialization_repos.py diff --git a/alembic/versions/20260614_1125-dc126696f5a6_remote_placeholders_track_availability_.py b/alembic/versions/20260614_1125-dc126696f5a6_remote_placeholders_track_availability_.py new file mode 100644 index 0000000..72e3ed3 --- /dev/null +++ b/alembic/versions/20260614_1125-dc126696f5a6_remote_placeholders_track_availability_.py @@ -0,0 +1,65 @@ +"""remote placeholders: track availability, album/artist remote ids + +Revision ID: dc126696f5a6 +Revises: 20260614_dl_track_id +Create Date: 2026-06-14 11:25:30.643588 +""" + +from __future__ import annotations + +from collections.abc import Sequence + +import sqlalchemy as sa +from alembic import op + +# revision identifiers, used by Alembic. +revision: str = 'dc126696f5a6' +down_revision: str | None = '20260614_dl_track_id' +branch_labels: str | Sequence[str] | None = None +depends_on: str | Sequence[str] | None = None + + +def upgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.add_column('albums', sa.Column('source', sa.String(length=32), nullable=True)) + op.add_column('albums', sa.Column('source_id', sa.String(length=512), nullable=True)) + op.create_unique_constraint('uq_albums_source_source_id', 'albums', ['source', 'source_id']) + op.add_column('artists', sa.Column('source', sa.String(length=32), nullable=True)) + op.add_column('artists', sa.Column('source_id', sa.String(length=512), nullable=True)) + op.create_unique_constraint('uq_artists_source_source_id', 'artists', ['source', 'source_id']) + op.add_column( + 'tracks', + sa.Column('availability', sa.String(length=16), nullable=False, server_default='local'), + ) + op.alter_column('tracks', 'availability', server_default=None) + op.alter_column('tracks', 'storage_uri', + existing_type=sa.VARCHAR(length=2048), + nullable=True) + op.alter_column('tracks', 'file_format', + existing_type=sa.VARCHAR(length=32), + nullable=True) + op.alter_column('tracks', 'file_size', + existing_type=sa.INTEGER(), + nullable=True) + # ### end Alembic commands ### + + +def downgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.alter_column('tracks', 'file_size', + existing_type=sa.INTEGER(), + nullable=False) + op.alter_column('tracks', 'file_format', + existing_type=sa.VARCHAR(length=32), + nullable=False) + op.alter_column('tracks', 'storage_uri', + existing_type=sa.VARCHAR(length=2048), + nullable=False) + op.drop_column('tracks', 'availability') + op.drop_constraint('uq_artists_source_source_id', 'artists', type_='unique') + op.drop_column('artists', 'source_id') + op.drop_column('artists', 'source') + op.drop_constraint('uq_albums_source_source_id', 'albums', type_='unique') + op.drop_column('albums', 'source_id') + op.drop_column('albums', 'source') + # ### end Alembic commands ### diff --git a/app/api/rest/media.py b/app/api/rest/media.py index ba439c5..db8df10 100644 --- a/app/api/rest/media.py +++ b/app/api/rest/media.py @@ -65,7 +65,7 @@ async def download( if track is None: raise NotFoundError("Song not found.") result = await service.open_stream(track_id, None) - filename = f"{track.title}.{track.file_format}" + filename = f"{track.title}.{track.file_format or 'bin'}" headers = { "Content-Length": str(result.content_length), "Content-Disposition": f'attachment; filename="{filename}"', diff --git a/app/api/rest/serializers.py b/app/api/rest/serializers.py index 5b6f99e..c6b7aaa 100644 --- a/app/api/rest/serializers.py +++ b/app/api/rest/serializers.py @@ -80,8 +80,8 @@ def song_dict( "albumId": encode_album(track.album_id) if track.album_id is not None else None, "artistId": encode_artist(track.artist_id), "coverArt": cover, - "size": track.file_size, - "contentType": content_type_for(track.file_format), + "size": track.file_size or 0, + "contentType": content_type_for(track.file_format or ""), "suffix": track.file_format, "duration": track.duration_seconds, "year": track.year, diff --git a/app/api/schemas/track.py b/app/api/schemas/track.py index 9a79dd8..b438af6 100644 --- a/app/api/schemas/track.py +++ b/app/api/schemas/track.py @@ -14,14 +14,15 @@ class TrackOut(BaseModel): album_id: uuid.UUID | None album_title: str | None duration_seconds: int | None - file_format: str - file_size: int + file_format: str | None + file_size: int | None genre: str | None year: int | None track_number: int | None metadata_status: str metadata_error: str | None enriched_at: dt.datetime | None + availability: str source: str has_cover: bool created_at: dt.datetime diff --git a/app/api/v1/tracks.py b/app/api/v1/tracks.py index f68d50e..427f64c 100644 --- a/app/api/v1/tracks.py +++ b/app/api/v1/tracks.py @@ -54,6 +54,7 @@ async def _build_track_out( metadata_status=t.metadata_status, metadata_error=t.metadata_error, enriched_at=t.enriched_at, + availability=t.availability, source=t.source, has_cover=bool(t.album_id and albums.get(t.album_id) and albums[t.album_id].cover_path), created_at=t.created_at, @@ -155,7 +156,8 @@ async def delete_track( if track is None: raise NotFoundError(f"Track {track_id} not found.") await track_repo.delete(track_id) - await storage.delete(track.storage_uri) + if track.storage_uri is not None: + await storage.delete(track.storage_uri) return Response(status_code=204) diff --git a/app/application/metadata_service.py b/app/application/metadata_service.py index 9e2db87..90915a7 100644 --- a/app/application/metadata_service.py +++ b/app/application/metadata_service.py @@ -79,9 +79,13 @@ class MetadataEnrichmentService: if track.metadata_status == "manual": log.info("enrich_skip_manual", track_id=str(track_id)) return EnrichmentResult(track_id=track_id, status="skipped") + storage_uri = track.storage_uri + if storage_uri is None: + log.info("enrich_skip_remote", track_id=str(track_id)) + return EnrichmentResult(track_id=track_id, status="skipped") - tags = await self._read_local(track.storage_uri) - match = await self._identify(track.storage_uri) + tags = await self._read_local(storage_uri) + match = await self._identify(storage_uri) # Merge order is tag-first by default — embedded tags fix the common # well-tagged offline case. But a *high-confidence* AcoustID match is the @@ -125,7 +129,7 @@ class MetadataEnrichmentService: if album is not None: await self._resolve_cover( album, - storage_uri=track.storage_uri, + storage_uri=storage_uri, release_group_mbid=match.release_group_mbid if match else None, ) @@ -175,6 +179,8 @@ class MetadataEnrichmentService: return [] if not self._acoustid.is_available() or not self._fingerprinter.is_available(): return [] + if track.storage_uri is None: + return [] try: async with self._storage.as_local_path(track.storage_uri) as path: fingerprint = await self._fingerprinter.calculate(path) diff --git a/app/application/streaming_service.py b/app/application/streaming_service.py index 5ee9d10..85c327b 100644 --- a/app/application/streaming_service.py +++ b/app/application/streaming_service.py @@ -72,16 +72,19 @@ class StreamingService: track = await self._tracks.get_by_id(track_id) if track is None: raise NotFoundError("Track not found.") + storage_uri = track.storage_uri + if storage_uri is None: + raise NotFoundError("Track is not yet downloaded.") - stat = await self._storage.stat(track.storage_uri) + stat = await self._storage.stat(storage_uri) total_size = stat.size content_type = stat.content_type or _FORMAT_CONTENT_TYPE.get( - track.file_format.lower(), "application/octet-stream" + (track.file_format or "").lower(), "application/octet-stream" ) start, end, is_partial = _parse_range(range_header, total_size) - stream, _ = await self._storage.open_range(track.storage_uri, start, end) + stream, _ = await self._storage.open_range(storage_uri, start, end) actual_end = end if end is not None else total_size - 1 content_length = actual_end - start + 1 diff --git a/app/domain/entities/album.py b/app/domain/entities/album.py index af6ab03..af75824 100644 --- a/app/domain/entities/album.py +++ b/app/domain/entities/album.py @@ -13,5 +13,7 @@ class Album: year: int | None cover_path: str | None musicbrainz_id: str | None + source: str | None + source_id: str | None created_at: dt.datetime updated_at: dt.datetime diff --git a/app/domain/entities/track.py b/app/domain/entities/track.py index 99eff3f..28caa57 100644 --- a/app/domain/entities/track.py +++ b/app/domain/entities/track.py @@ -9,6 +9,8 @@ from dataclasses import dataclass class Artist: id: uuid.UUID name: str + source: str | None + source_id: str | None created_at: dt.datetime updated_at: dt.datetime @@ -19,9 +21,9 @@ class Track: title: str artist_id: uuid.UUID album_id: uuid.UUID | None - storage_uri: str - file_format: str - file_size: int + storage_uri: str | None + file_format: str | None + file_size: int | None source: str source_id: str duration_seconds: int | None @@ -31,5 +33,6 @@ class Track: metadata_status: str metadata_error: str | None enriched_at: dt.datetime | None + availability: str created_at: dt.datetime updated_at: dt.datetime diff --git a/app/domain/ports.py b/app/domain/ports.py index 47c6f9b..c73b85a 100644 --- a/app/domain/ports.py +++ b/app/domain/ports.py @@ -114,6 +114,11 @@ class FileStorage(Protocol): class ArtistRepository(Protocol): async def get_or_create(self, name: str) -> Artist: ... + async def get_or_create_remote(self, *, name: str, source: str, source_id: str) -> Artist: + """Resolve/create an artist bound to a remote ``(source, source_id)`` + (lazy materialization save-to-library).""" + ... + async def get_by_id(self, artist_id: uuid.UUID) -> Artist | None: ... async def get_many(self, ids: list[uuid.UUID]) -> list[Artist]: ... async def list(self, *, q: str | None, limit: int, offset: int) -> list[Artist]: ... @@ -131,14 +136,28 @@ class TrackRepository(Protocol): id: uuid.UUID, title: str, artist_id: uuid.UUID, - storage_uri: str, - file_format: str, - file_size: int, + storage_uri: str | None, + file_format: str | None, + file_size: int | None, source: str, source_id: str, metadata_status: str, added_by: uuid.UUID | None, + availability: str = ..., ) -> Track: ... + async def materialize( + self, + track_id: uuid.UUID, + *, + storage_uri: str, + file_format: str, + file_size: int, + bitrate: int | None, + ) -> Track: + """Fill in a remote placeholder's audio fields after a download + (lazy materialization), flipping ``availability`` to ``local``.""" + ... + async def delete(self, track_id: uuid.UUID) -> None: ... # genres / library_stats must come before ``list`` — the method named # ``list`` shadows the builtin in later annotations (same pattern as @@ -212,6 +231,20 @@ class AlbumRepository(Protocol): year: int | None, musicbrainz_id: str | None, ) -> Album: ... + async def get_or_create_remote( + self, + *, + title: str, + artist_id: uuid.UUID, + year: int | None, + musicbrainz_id: str | None, + source: str, + source_id: str, + ) -> Album: + """Resolve/create an album bound to a remote ``(source, source_id)`` + (lazy materialization save-to-library).""" + ... + async def set_cover_path(self, album_id: uuid.UUID, cover_path: str) -> None: ... async def get_by_id(self, album_id: uuid.UUID) -> Album | None: ... async def get_many(self, ids: list[uuid.UUID]) -> list[Album]: ... diff --git a/app/infrastructure/db/models/album.py b/app/infrastructure/db/models/album.py index d1960d5..3aa84ff 100644 --- a/app/infrastructure/db/models/album.py +++ b/app/infrastructure/db/models/album.py @@ -2,7 +2,7 @@ import uuid -from sqlalchemy import ForeignKey, Integer, String +from sqlalchemy import ForeignKey, Integer, String, UniqueConstraint from sqlalchemy.orm import Mapped, mapped_column from app.infrastructure.db.base import Base @@ -11,6 +11,12 @@ from app.infrastructure.db.models.mixins import TimestampMixin, UUIDPrimaryKeyMi class AlbumModel(UUIDPrimaryKeyMixin, TimestampMixin, Base): __tablename__ = "albums" + __table_args__ = ( + # Binds a remote (browsable) album to its local row for re-browse/save + # dedup. Multiple NULLs are allowed by Postgres, so locally-created + # albums (source/source_id both NULL) never collide on this. + UniqueConstraint("source", "source_id", name="uq_albums_source_source_id"), + ) title: Mapped[str] = mapped_column(String(1024), index=True, nullable=False) artist_id: Mapped[uuid.UUID] = mapped_column( @@ -21,3 +27,7 @@ class AlbumModel(UUIDPrimaryKeyMixin, TimestampMixin, Base): year: Mapped[int | None] = mapped_column(Integer, nullable=True) cover_path: Mapped[str | None] = mapped_column(String(1024), nullable=True) musicbrainz_id: Mapped[str | None] = mapped_column(String(36), index=True, nullable=True) + + # -- remote identity (lazy materialization) -------------------------- + source: Mapped[str | None] = mapped_column(String(32), nullable=True) + source_id: Mapped[str | None] = mapped_column(String(512), nullable=True) diff --git a/app/infrastructure/db/models/artist.py b/app/infrastructure/db/models/artist.py index 4a706e7..a7bef5a 100644 --- a/app/infrastructure/db/models/artist.py +++ b/app/infrastructure/db/models/artist.py @@ -1,6 +1,6 @@ """ORM model for artists.""" -from sqlalchemy import String +from sqlalchemy import String, UniqueConstraint from sqlalchemy.orm import Mapped, mapped_column from app.infrastructure.db.base import Base @@ -9,6 +9,16 @@ from app.infrastructure.db.models.mixins import TimestampMixin, UUIDPrimaryKeyMi class ArtistModel(UUIDPrimaryKeyMixin, TimestampMixin, Base): __tablename__ = "artists" + __table_args__ = ( + # Binds a remote (browsable) artist to its local row for re-browse/save + # dedup. Multiple NULLs are allowed by Postgres, so locally-created + # artists (source/source_id both NULL) never collide on this. + UniqueConstraint("source", "source_id", name="uq_artists_source_source_id"), + ) name: Mapped[str] = mapped_column(String(512), index=True, nullable=False) musicbrainz_id: Mapped[str | None] = mapped_column(String(36), index=True, nullable=True) + + # -- remote identity (lazy materialization) -------------------------- + source: Mapped[str | None] = mapped_column(String(32), nullable=True) + source_id: Mapped[str | None] = mapped_column(String(512), nullable=True) diff --git a/app/infrastructure/db/models/enums.py b/app/infrastructure/db/models/enums.py index 4a5ef0c..50e9f93 100644 --- a/app/infrastructure/db/models/enums.py +++ b/app/infrastructure/db/models/enums.py @@ -64,3 +64,12 @@ class LyricsStatus(enum.StrEnum): FOUND = "found" NOT_FOUND = "not_found" PENDING = "pending" + + +class TrackAvailability(enum.StrEnum): + """Whether a track's audio is on local storage or still a remote placeholder + (plan: lazy materialization). ``remote`` tracks have ``storage_uri = NULL`` + until ``TrackRepository.materialize`` fills it in.""" + + LOCAL = "local" + REMOTE = "remote" diff --git a/app/infrastructure/db/models/track.py b/app/infrastructure/db/models/track.py index 0cfced7..000105d 100644 --- a/app/infrastructure/db/models/track.py +++ b/app/infrastructure/db/models/track.py @@ -13,7 +13,7 @@ from sqlalchemy import DateTime, ForeignKey, Integer, String, UniqueConstraint from sqlalchemy.orm import Mapped, mapped_column from app.infrastructure.db.base import Base -from app.infrastructure.db.models.enums import MetadataStatus, StoragePolicy +from app.infrastructure.db.models.enums import MetadataStatus, StoragePolicy, TrackAvailability from app.infrastructure.db.models.mixins import TimestampMixin, UUIDPrimaryKeyMixin @@ -41,11 +41,20 @@ class TrackModel(UUIDPrimaryKeyMixin, TimestampMixin, Base): year: Mapped[int | None] = mapped_column(Integer, nullable=True) # -- file (original, stored as-is) ----------------------------------- - storage_uri: Mapped[str] = mapped_column(String(2048), nullable=False) - file_format: Mapped[str] = mapped_column(String(32), nullable=False) - file_size: Mapped[int] = mapped_column(Integer, nullable=False) + # NULL on a remote placeholder (not yet materialized) — see ``availability``. + storage_uri: Mapped[str | None] = mapped_column(String(2048), nullable=True) + file_format: Mapped[str | None] = mapped_column(String(32), nullable=True) + file_size: Mapped[int | None] = mapped_column(Integer, nullable=True) bitrate: Mapped[int | None] = mapped_column(Integer, nullable=True) + # ``remote`` = placeholder with no local audio yet; materialize() flips this + # to ``local`` once the file is downloaded and ``storage_uri`` is filled in. + availability: Mapped[str] = mapped_column( + String(16), + nullable=False, + default=TrackAvailability.LOCAL.value, + ) + # -- dedup / external ids -------------------------------------------- acoustid_fingerprint: Mapped[str | None] = mapped_column(String(64), index=True, nullable=True) musicbrainz_id: Mapped[str | None] = mapped_column(String(36), index=True, nullable=True) diff --git a/app/infrastructure/db/repositories/album_repository.py b/app/infrastructure/db/repositories/album_repository.py index f34e980..05e5308 100644 --- a/app/infrastructure/db/repositories/album_repository.py +++ b/app/infrastructure/db/repositories/album_repository.py @@ -18,6 +18,8 @@ def _to_entity(row: AlbumModel) -> Album: year=row.year, cover_path=row.cover_path, musicbrainz_id=row.musicbrainz_id, + source=row.source, + source_id=row.source_id, created_at=row.created_at, updated_at=row.updated_at, ) @@ -63,6 +65,58 @@ class SqlAlchemyAlbumRepository: await self._session.refresh(row) return _to_entity(row) + async def get_or_create_remote( + self, + *, + title: str, + artist_id: uuid.UUID, + year: int | None, + musicbrainz_id: str | None, + source: str, + source_id: str, + ) -> Album: + """Resolve an album by ``(source, source_id)`` first (re-browse/save + dedup), falling back to ``(title, artist_id)`` and gap-filling the + remote ids onto an existing row, else creating a new remote-bound row.""" + row = ( + await self._session.execute( + select(AlbumModel).where( + AlbumModel.source == source, + AlbumModel.source_id == source_id, + ) + ) + ).scalar_one_or_none() + if row is None: + row = ( + await self._session.execute( + select(AlbumModel).where( + AlbumModel.title == title, + AlbumModel.artist_id == artist_id, + ) + ) + ).scalar_one_or_none() + if row is None: + row = AlbumModel( + title=title, + artist_id=artist_id, + year=year, + musicbrainz_id=musicbrainz_id, + source=source, + source_id=source_id, + ) + self._session.add(row) + else: + if row.year is None and year is not None: + row.year = year + if row.musicbrainz_id is None and musicbrainz_id is not None: + row.musicbrainz_id = musicbrainz_id + if row.source is None and row.source_id is None: + row.source = source + row.source_id = source_id + await self._session.flush() + await self._session.refresh(row) + return _to_entity(row) + async def set_cover_path(self, album_id: uuid.UUID, cover_path: str) -> None: row = await self._session.get(AlbumModel, album_id) if row is not None: diff --git a/app/infrastructure/db/repositories/artist_repository.py b/app/infrastructure/db/repositories/artist_repository.py index 4d42494..0f810e6 100644 --- a/app/infrastructure/db/repositories/artist_repository.py +++ b/app/infrastructure/db/repositories/artist_repository.py @@ -15,6 +15,8 @@ def _to_entity(row: ArtistModel) -> Artist: return Artist( id=row.id, name=row.name, + source=row.source, + source_id=row.source_id, created_at=row.created_at, updated_at=row.updated_at, ) @@ -35,6 +37,32 @@ class SqlAlchemyArtistRepository: await self._session.refresh(row) return _to_entity(row) + async def get_or_create_remote(self, *, name: str, source: str, source_id: str) -> Artist: + """Resolve an artist by ``(source, source_id)`` first (re-browse/save + dedup), falling back to ``name`` and gap-filling the remote ids onto an + existing row, else creating a new remote-bound row.""" + row = ( + await self._session.execute( + select(ArtistModel).where( + ArtistModel.source == source, + ArtistModel.source_id == source_id, + ) + ) + ).scalar_one_or_none() + if row is None: + row = ( + await self._session.execute(select(ArtistModel).where(ArtistModel.name == name)) + ).scalar_one_or_none() + if row is None: + row = ArtistModel(name=name, source=source, source_id=source_id) + self._session.add(row) + elif row.source is None and row.source_id is None: + row.source = source + row.source_id = source_id + await self._session.flush() + await self._session.refresh(row) + return _to_entity(row) + async def get_by_id(self, artist_id: uuid.UUID) -> Artist | None: row = await self._session.get(ArtistModel, artist_id) return _to_entity(row) if row is not None else None diff --git a/app/infrastructure/db/repositories/like_repository.py b/app/infrastructure/db/repositories/like_repository.py index 5d0adae..2e50386 100644 --- a/app/infrastructure/db/repositories/like_repository.py +++ b/app/infrastructure/db/repositories/like_repository.py @@ -42,6 +42,7 @@ def _track_to_entity(row: TrackModel) -> Track: metadata_status=row.metadata_status, metadata_error=row.metadata_error, enriched_at=row.enriched_at, + availability=row.availability, created_at=row.created_at, updated_at=row.updated_at, ) diff --git a/app/infrastructure/db/repositories/playlist_repository.py b/app/infrastructure/db/repositories/playlist_repository.py index 014a889..af01f9c 100644 --- a/app/infrastructure/db/repositories/playlist_repository.py +++ b/app/infrastructure/db/repositories/playlist_repository.py @@ -41,6 +41,7 @@ def _track_to_entity(row: TrackModel) -> Track: metadata_status=row.metadata_status, metadata_error=row.metadata_error, enriched_at=row.enriched_at, + availability=row.availability, created_at=row.created_at, updated_at=row.updated_at, ) diff --git a/app/infrastructure/db/repositories/track_repository.py b/app/infrastructure/db/repositories/track_repository.py index d76c1e6..bf076ef 100644 --- a/app/infrastructure/db/repositories/track_repository.py +++ b/app/infrastructure/db/repositories/track_repository.py @@ -10,6 +10,7 @@ from app.domain.entities.storage import FormatBreakdown, LibraryStats from app.domain.entities.track import Track from app.domain.errors import NotFoundError from app.infrastructure.db.models.artist import ArtistModel +from app.infrastructure.db.models.enums import TrackAvailability from app.infrastructure.db.models.track import TrackModel @@ -31,6 +32,7 @@ def _to_entity(row: TrackModel) -> Track: metadata_status=row.metadata_status, metadata_error=row.metadata_error, enriched_at=row.enriched_at, + availability=row.availability, created_at=row.created_at, updated_at=row.updated_at, ) @@ -61,13 +63,14 @@ class SqlAlchemyTrackRepository: id: uuid.UUID, title: str, artist_id: uuid.UUID, - storage_uri: str, - file_format: str, - file_size: int, + storage_uri: str | None, + file_format: str | None, + file_size: int | None, source: str, source_id: str, metadata_status: str, added_by: uuid.UUID | None, + availability: str = TrackAvailability.LOCAL.value, ) -> Track: row = TrackModel( id=id, @@ -80,12 +83,38 @@ class SqlAlchemyTrackRepository: source_id=source_id, metadata_status=metadata_status, added_by=added_by, + availability=availability, ) self._session.add(row) await self._session.flush() await self._session.refresh(row) return _to_entity(row) + async def materialize( + self, + track_id: uuid.UUID, + *, + storage_uri: str, + file_format: str, + file_size: int, + bitrate: int | None, + ) -> Track: + """Fill in a remote placeholder's audio fields after a download (lazy + materialization). ``track.id`` is unchanged, so likes/playlists/queue + entries that already reference it keep working.""" + row = await self._session.get(TrackModel, track_id) + if row is None: + raise NotFoundError(f"Track {track_id} not found.") + row.storage_uri = storage_uri + row.file_format = file_format + row.file_size = file_size + if bitrate is not None: + row.bitrate = bitrate + row.availability = TrackAvailability.LOCAL.value + await self._session.flush() + await self._session.refresh(row) + return _to_entity(row) + async def delete(self, track_id: uuid.UUID) -> None: row = await self._session.get(TrackModel, track_id) if row is not None: @@ -130,6 +159,7 @@ class SqlAlchemyTrackRepository: func.count(TrackModel.id), func.coalesce(func.sum(TrackModel.file_size), 0), ) + .where(TrackModel.file_format.is_not(None)) .group_by(TrackModel.file_format) .order_by(func.sum(TrackModel.file_size).desc()) ) diff --git a/tests/test_download_service.py b/tests/test_download_service.py index 85a9854..805dbd6 100644 --- a/tests/test_download_service.py +++ b/tests/test_download_service.py @@ -16,7 +16,14 @@ pytestmark = pytest.mark.asyncio class FakeArtistRepo: async def get_or_create(self, name: str) -> Artist: now = dt.datetime.now(dt.UTC) - return Artist(id=uuid.uuid4(), name=name, created_at=now, updated_at=now) + return Artist( + id=uuid.uuid4(), + name=name, + source=None, + source_id=None, + created_at=now, + updated_at=now, + ) class FakeTrackRepo: @@ -46,6 +53,7 @@ class FakeTrackRepo: metadata_status=str(kw["metadata_status"]), metadata_error=None, enriched_at=None, + availability="local", created_at=now, updated_at=now, ) @@ -140,6 +148,7 @@ def _track(source: str, source_id: str) -> Track: metadata_status="pending", metadata_error=None, enriched_at=None, + availability="local", created_at=now, updated_at=now, ) diff --git a/tests/test_import_service.py b/tests/test_import_service.py index ffbc13a..eb10a3f 100644 --- a/tests/test_import_service.py +++ b/tests/test_import_service.py @@ -20,7 +20,14 @@ class FakeArtistRepo: async def get_or_create(self, name: str) -> Artist: if name not in self._by_name: now = dt.datetime.now(dt.UTC) - self._by_name[name] = Artist(id=uuid.uuid4(), name=name, created_at=now, updated_at=now) + self._by_name[name] = Artist( + id=uuid.uuid4(), + name=name, + source=None, + source_id=None, + created_at=now, + updated_at=now, + ) return self._by_name[name] @@ -55,6 +62,7 @@ class FakeTrackRepo: metadata_status=str(kw["metadata_status"]), metadata_error=None, enriched_at=None, + availability="local", created_at=now, updated_at=now, ) @@ -139,6 +147,7 @@ async def test_dedup_skips_already_imported() -> None: metadata_status="pending", metadata_error=None, enriched_at=None, + availability="local", created_at=now, updated_at=now, ) diff --git a/tests/test_lazy_materialization_repos.py b/tests/test_lazy_materialization_repos.py new file mode 100644 index 0000000..ad5a516 --- /dev/null +++ b/tests/test_lazy_materialization_repos.py @@ -0,0 +1,156 @@ +"""Integration tests for the lazy-materialization foundation: +``TrackRepository.materialize`` and ``Album``/``ArtistRepository.get_or_create_remote``. + +Requires a reachable Postgres; skips otherwise (same pattern as +``test_upload_stream_api.py``). +""" + +import asyncio +import uuid +from collections.abc import AsyncIterator + +import pytest +from app.infrastructure.db import Base, dispose_engine, get_engine, session_scope +from app.infrastructure.db.models.enums import TrackAvailability +from app.infrastructure.db.repositories import ( + SqlAlchemyAlbumRepository, + SqlAlchemyArtistRepository, + SqlAlchemyTrackRepository, +) + +pytestmark = pytest.mark.asyncio + +_db_reachable_cache: bool | None = None + + +async def _db_reachable() -> bool: + global _db_reachable_cache + if _db_reachable_cache is not None: + return _db_reachable_cache + + from sqlalchemy import text + + try: + async with asyncio.timeout(3): + async with get_engine().connect() as conn: + await conn.execute(text("SELECT 1")) + _db_reachable_cache = True + except Exception: + _db_reachable_cache = False + return _db_reachable_cache + + +@pytest.fixture +async def db() -> AsyncIterator[None]: + if not await _db_reachable(): + pytest.skip("Postgres not reachable — integration test skipped.") + + async with get_engine().begin() as conn: + await conn.run_sync(Base.metadata.drop_all) + await conn.run_sync(Base.metadata.create_all) + + yield None + + async with get_engine().begin() as conn: + await conn.run_sync(Base.metadata.drop_all) + await dispose_engine() + + +async def test_placeholder_track_materializes_in_place(db: None) -> None: + """A remote placeholder (no storage) gets its audio fields filled in by + ``materialize`` without changing ``track.id`` — the stable content id that + likes/playlists/queue may already reference.""" + async with session_scope() as session: + artists = SqlAlchemyArtistRepository(session) + tracks = SqlAlchemyTrackRepository(session) + artist = await artists.get_or_create("Some Artist") + + track_id = uuid.uuid4() + placeholder = await tracks.add( + id=track_id, + title="Remote Track", + artist_id=artist.id, + storage_uri=None, + file_format=None, + file_size=None, + source="youtube", + source_id="abc123", + metadata_status="pending", + added_by=None, + availability=TrackAvailability.REMOTE.value, + ) + assert placeholder.availability == "remote" + assert placeholder.storage_uri is None + + materialized = await tracks.materialize( + track_id, + storage_uri="tracks/ab/abc123.m4a", + file_format="m4a", + file_size=12345, + bitrate=160, + ) + + assert materialized.id == track_id + assert materialized.availability == "local" + assert materialized.storage_uri == "tracks/ab/abc123.m4a" + assert materialized.file_format == "m4a" + assert materialized.file_size == 12345 + + +async def test_artist_get_or_create_remote_dedups_by_remote_id(db: None) -> None: + async with session_scope() as session: + artists = SqlAlchemyArtistRepository(session) + + first = await artists.get_or_create_remote( + name="Daft Punk", source="youtube", source_id="UCabc" + ) + again = await artists.get_or_create_remote( + name="Daft Punk (different display name)", source="youtube", source_id="UCabc" + ) + + assert first.id == again.id + assert again.source == "youtube" + assert again.source_id == "UCabc" + + +async def test_artist_get_or_create_remote_binds_existing_local_artist(db: None) -> None: + async with session_scope() as session: + artists = SqlAlchemyArtistRepository(session) + + local = await artists.get_or_create("Pink Floyd") + remote = await artists.get_or_create_remote( + name="Pink Floyd", source="youtube", source_id="UCxyz" + ) + + assert remote.id == local.id + assert remote.source == "youtube" + assert remote.source_id == "UCxyz" + + +async def test_album_get_or_create_remote_dedups_by_remote_id(db: None) -> None: + async with session_scope() as session: + artists = SqlAlchemyArtistRepository(session) + albums = SqlAlchemyAlbumRepository(session) + artist = await artists.get_or_create("Daft Punk") + + first = await albums.get_or_create_remote( + title="Discovery", + artist_id=artist.id, + year=2001, + musicbrainz_id=None, + source="youtube", + source_id="MPREb_abc", + ) + again = await albums.get_or_create_remote( + title="Discovery", + artist_id=artist.id, + year=None, + musicbrainz_id=None, + source="youtube", + source_id="MPREb_abc", + ) + + assert first.id == again.id + assert again.source == "youtube" + assert again.source_id == "MPREb_abc" + assert again.year == 2001 diff --git a/tests/test_metadata_service.py b/tests/test_metadata_service.py index 2d6d7aa..e8f602f 100644 --- a/tests/test_metadata_service.py +++ b/tests/test_metadata_service.py @@ -42,6 +42,7 @@ def _track(*, metadata_status: str = "pending", title: str = "raw-stem") -> Trac metadata_status=metadata_status, metadata_error=None, enriched_at=None, + availability="local", created_at=now, updated_at=now, ) @@ -67,7 +68,14 @@ class FakeArtistRepo: async def get_or_create(self, name: str) -> Artist: self.created.append(name) now = dt.datetime.now(dt.UTC) - return Artist(id=uuid.uuid4(), name=name, created_at=now, updated_at=now) + return Artist( + id=uuid.uuid4(), + name=name, + source=None, + source_id=None, + created_at=now, + updated_at=now, + ) class FakeAlbumRepo: @@ -88,6 +96,8 @@ class FakeAlbumRepo: year=year, cover_path=self._existing_cover, musicbrainz_id=musicbrainz_id, + source=None, + source_id=None, created_at=now, updated_at=now, ) diff --git a/tests/test_real_file_enrichment.py b/tests/test_real_file_enrichment.py index 023738a..1b0a775 100644 --- a/tests/test_real_file_enrichment.py +++ b/tests/test_real_file_enrichment.py @@ -149,6 +149,7 @@ def _pending_track() -> Track: metadata_status="pending", metadata_error=None, enriched_at=None, + availability="local", created_at=now, updated_at=now, ) @@ -180,7 +181,14 @@ class _FakeArtistRepo: async def get_or_create(self, name: str) -> Artist: self.created.append(name) now = datetime.now(UTC) - return Artist(id=uuid.uuid4(), name=name, created_at=now, updated_at=now) + return Artist( + id=uuid.uuid4(), + name=name, + source=None, + source_id=None, + created_at=now, + updated_at=now, + ) @dataclass @@ -199,6 +207,8 @@ class _FakeAlbumRepo: year=year, cover_path=None, musicbrainz_id=musicbrainz_id, + source=None, + source_id=None, created_at=now, updated_at=now, )