please work please work ci please · teal.fm/teal@e65c5e1

+52

.dockerignore

···

··· 1 + # Rust build artifacts 2 + target/ 3 + **/target/ 4 + services/target/ 5 + apps/*/target/ 6 + 7 + # Node.js dependencies and build artifacts 8 + node_modules/ 9 + **/node_modules/ 10 + .turbo/ 11 + **/.turbo/ 12 + build/ 13 + dist/ 14 + .next/ 15 + 16 + # Development and cache files 17 + .git/ 18 + .gitignore 19 + **/.DS_Store 20 + *.log 21 + *.tmp 22 + *.temp 23 + 24 + # IDE and editor files 25 + .vscode/ 26 + .idea/ 27 + *.swp 28 + *.swo 29 + *~ 30 + 31 + # Environment and config files 32 + .env 33 + .env.local 34 + .env.*.local 35 + 36 + # Database files 37 + *.db 38 + *.sqlite 39 + *.sqlite3 40 + 41 + # Test coverage 42 + coverage/ 43 + **/coverage/ 44 + 45 + # Temporary files 46 + tmp/ 47 + temp/ 48 + 49 + # SQLx offline query cache 50 + # Include workspace-level cache for monorepo builds 51 + # Uncomment the line below if you want to force online compilation 52 + # .sqlx/

+4 -12

.github/workflows/amethyst.yml

··· 46 run: pnpm lex:gen-server 47 48 - name: Build web 49 - run: | 50 - cd apps/amethyst 51 - pnpm build:web 52 53 - name: Upload web build artifacts 54 uses: actions/upload-artifact@v4 ··· 84 run: npm install -g @expo/cli 85 86 - name: Build iOS 87 - run: | 88 - cd apps/amethyst 89 - pnpm build:ios 90 91 - name: Upload iOS build artifacts 92 uses: actions/upload-artifact@v4 ··· 118 run: pnpm lex:gen-server 119 120 - name: Type check 121 - run: | 122 - cd apps/amethyst 123 - npx tsc --noEmit 124 125 - name: Run tests 126 - run: | 127 - cd apps/amethyst 128 - pnpm test --watchAll=false

··· 46 run: pnpm lex:gen-server 47 48 - name: Build web 49 + run: pnpm turbo build:web --filter=@teal/amethyst 50 51 - name: Upload web build artifacts 52 uses: actions/upload-artifact@v4 ··· 82 run: npm install -g @expo/cli 83 84 - name: Build iOS 85 + run: pnpm turbo build:ios --filter=@teal/amethyst 86 87 - name: Upload iOS build artifacts 88 uses: actions/upload-artifact@v4 ··· 114 run: pnpm lex:gen-server 115 116 - name: Type check 117 + run: pnpm turbo check-types --filter=@teal/amethyst 118 119 - name: Run tests 120 + run: pnpm turbo test --filter=@teal/amethyst

+1 -1

.github/workflows/aqua.yml

··· 60 - name: Build and push Docker image 61 uses: docker/build-push-action@v5 62 with: 63 - context: ./apps/aqua 64 file: ./apps/aqua/Dockerfile 65 push: ${{ github.event_name != 'pull_request' }} 66 tags: ${{ steps.meta.outputs.tags }}

··· 60 - name: Build and push Docker image 61 uses: docker/build-push-action@v5 62 with: 63 + context: . 64 file: ./apps/aqua/Dockerfile 65 push: ${{ github.event_name != 'pull_request' }} 66 tags: ${{ steps.meta.outputs.tags }}

-16

.gitignore

··· 65 vendor/**/*.d.ts 66 vendor/**/dist/ 67 vendor/**/node_modules/ 68 - 69 - # lexicons directory structure 70 - !lexicons/ 71 - # Track our custom lexicons 72 - !lexicons/fm.teal.alpha/ 73 - !lexicons/fm.teal.alpha/**/*.json 74 - # Ignore symlinks to atproto lexicons (created during setup) 75 - lexicons/app 76 - lexicons/chat 77 - lexicons/com 78 - lexicons/tools 79 - # But ignore any generated files within lexicons 80 - lexicons/**/*.js 81 - lexicons/**/*.d.ts 82 - lexicons/**/dist/ 83 - lexicons/**/node_modules/

··· 65 vendor/**/*.d.ts 66 vendor/**/dist/ 67 vendor/**/node_modules/

+30 -66

Cargo.lock

··· 128 "chrono", 129 "clap", 130 "dotenvy", 131 - "iroh-car 0.4.0", 132 "redis", 133 "reqwest", 134 "serde", ··· 199 "dashmap", 200 "futures", 201 "ipld-core", 202 - "iroh-car 0.5.1", 203 "log", 204 "multihash 0.19.3", 205 "serde", ··· 372 ] 373 374 [[package]] 375 name = "backtrace" 376 version = "0.3.75" 377 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 552 "dotenvy", 553 "flume", 554 "futures", 555 - "iroh-car 0.4.0", 556 "libipld", 557 "metrics 0.23.1", 558 "metrics-exporter-prometheus", ··· 1875 1876 [[package]] 1877 name = "iroh-car" 1878 - version = "0.4.0" 1879 - source = "registry+https://github.com/rust-lang/crates.io-index" 1880 - checksum = "475a6f0ebd64c87ea011021c67f10b57930f6c286e0163807066bfb83553b1b6" 1881 - dependencies = [ 1882 - "anyhow", 1883 - "cid 0.10.1", 1884 - "futures", 1885 - "libipld", 1886 - "thiserror 1.0.69", 1887 - "tokio", 1888 - "unsigned-varint 0.7.2", 1889 - ] 1890 - 1891 - [[package]] 1892 - name = "iroh-car" 1893 version = "0.5.1" 1894 source = "registry+https://github.com/rust-lang/crates.io-index" 1895 checksum = "cb7f8cd4cb9aa083fba8b52e921764252d0b4dcb1cd6d120b809dbfe1106e81a" ··· 2497 ] 2498 2499 [[package]] 2500 name = "num-bigint-dig" 2501 version = "0.8.4" 2502 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 2689 checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" 2690 2691 [[package]] 2692 - name = "pin-project" 2693 - version = "1.1.10" 2694 - source = "registry+https://github.com/rust-lang/crates.io-index" 2695 - checksum = "677f1add503faace112b9f1373e43e9e054bfdd22ff1a63c1bc485eaec6a6a8a" 2696 - dependencies = [ 2697 - "pin-project-internal", 2698 - ] 2699 - 2700 - [[package]] 2701 - name = "pin-project-internal" 2702 - version = "1.1.10" 2703 - source = "registry+https://github.com/rust-lang/crates.io-index" 2704 - checksum = "6e918e4ff8c4549eb882f14b3a4bc8c8bc93de829416eacf579f1207a8fbf861" 2705 - dependencies = [ 2706 - "proc-macro2", 2707 - "quote", 2708 - "syn 2.0.104", 2709 - ] 2710 - 2711 - [[package]] 2712 name = "pin-project-lite" 2713 version = "0.2.16" 2714 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 2915 "once_cell", 2916 "socket2 0.5.10", 2917 "tracing", 2918 - "windows-sys 0.52.0", 2919 ] 2920 2921 [[package]] ··· 3012 3013 [[package]] 3014 name = "redis" 3015 - version = "0.24.0" 3016 source = "registry+https://github.com/rust-lang/crates.io-index" 3017 - checksum = "c580d9cbbe1d1b479e8d67cf9daf6a62c957e6846048408b80b43ac3f6af84cd" 3018 dependencies = [ 3019 "arc-swap", 3020 - "async-trait", 3021 "bytes", 3022 "combine", 3023 - "futures", 3024 "futures-util", 3025 "itoa", 3026 "percent-encoding", 3027 "pin-project-lite", 3028 "ryu", 3029 "sha1_smol", 3030 - "socket2 0.4.10", 3031 "tokio", 3032 - "tokio-retry", 3033 "tokio-util", 3034 "url", 3035 ] ··· 3605 3606 [[package]] 3607 name = "socket2" 3608 - version = "0.4.10" 3609 - source = "registry+https://github.com/rust-lang/crates.io-index" 3610 - checksum = "9f7916fc008ca5542385b89a3d3ce689953c143e9304a9bf8beec1de48994c0d" 3611 - dependencies = [ 3612 - "libc", 3613 - "winapi", 3614 - ] 3615 - 3616 - [[package]] 3617 - name = "socket2" 3618 version = "0.5.10" 3619 source = "registry+https://github.com/rust-lang/crates.io-index" 3620 checksum = "e22376abed350d73dd1cd119b57ffccad95b4e585a7cda43e286245ce23c0678" ··· 4171 checksum = "bbae76ab933c85776efabc971569dd6119c580d8f5d448769dec1764bf796ef2" 4172 dependencies = [ 4173 "native-tls", 4174 - "tokio", 4175 - ] 4176 - 4177 - [[package]] 4178 - name = "tokio-retry" 4179 - version = "0.3.0" 4180 - source = "registry+https://github.com/rust-lang/crates.io-index" 4181 - checksum = "7f57eb36ecbe0fc510036adff84824dd3c24bb781e21bfa67b69d556aa85214f" 4182 - dependencies = [ 4183 - "pin-project", 4184 - "rand 0.8.5", 4185 "tokio", 4186 ] 4187

··· 128 "chrono", 129 "clap", 130 "dotenvy", 131 + "iroh-car", 132 "redis", 133 "reqwest", 134 "serde", ··· 199 "dashmap", 200 "futures", 201 "ipld-core", 202 + "iroh-car", 203 "log", 204 "multihash 0.19.3", 205 "serde", ··· 372 ] 373 374 [[package]] 375 + name = "backon" 376 + version = "1.5.2" 377 + source = "registry+https://github.com/rust-lang/crates.io-index" 378 + checksum = "592277618714fbcecda9a02ba7a8781f319d26532a88553bbacc77ba5d2b3a8d" 379 + dependencies = [ 380 + "fastrand", 381 + ] 382 + 383 + [[package]] 384 name = "backtrace" 385 version = "0.3.75" 386 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 561 "dotenvy", 562 "flume", 563 "futures", 564 + "iroh-car", 565 "libipld", 566 "metrics 0.23.1", 567 "metrics-exporter-prometheus", ··· 1884 1885 [[package]] 1886 name = "iroh-car" 1887 version = "0.5.1" 1888 source = "registry+https://github.com/rust-lang/crates.io-index" 1889 checksum = "cb7f8cd4cb9aa083fba8b52e921764252d0b4dcb1cd6d120b809dbfe1106e81a" ··· 2491 ] 2492 2493 [[package]] 2494 + name = "num-bigint" 2495 + version = "0.4.6" 2496 + source = "registry+https://github.com/rust-lang/crates.io-index" 2497 + checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9" 2498 + dependencies = [ 2499 + "num-integer", 2500 + "num-traits", 2501 + ] 2502 + 2503 + [[package]] 2504 name = "num-bigint-dig" 2505 version = "0.8.4" 2506 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 2693 checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" 2694 2695 [[package]] 2696 name = "pin-project-lite" 2697 version = "0.2.16" 2698 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 2899 "once_cell", 2900 "socket2 0.5.10", 2901 "tracing", 2902 + "windows-sys 0.59.0", 2903 ] 2904 2905 [[package]] ··· 2996 2997 [[package]] 2998 name = "redis" 2999 + version = "0.32.4" 3000 source = "registry+https://github.com/rust-lang/crates.io-index" 3001 + checksum = "e1f66bf4cac9733a23bcdf1e0e01effbaaad208567beba68be8f67e5f4af3ee1" 3002 dependencies = [ 3003 "arc-swap", 3004 + "backon", 3005 "bytes", 3006 + "cfg-if", 3007 "combine", 3008 + "futures-channel", 3009 "futures-util", 3010 "itoa", 3011 + "num-bigint", 3012 "percent-encoding", 3013 "pin-project-lite", 3014 "ryu", 3015 "sha1_smol", 3016 + "socket2 0.6.0", 3017 "tokio", 3018 "tokio-util", 3019 "url", 3020 ] ··· 3590 3591 [[package]] 3592 name = "socket2" 3593 version = "0.5.10" 3594 source = "registry+https://github.com/rust-lang/crates.io-index" 3595 checksum = "e22376abed350d73dd1cd119b57ffccad95b4e585a7cda43e286245ce23c0678" ··· 4146 checksum = "bbae76ab933c85776efabc971569dd6119c580d8f5d448769dec1764bf796ef2" 4147 dependencies = [ 4148 "native-tls", 4149 "tokio", 4150 ] 4151

+2 -2

Cargo.toml

··· 34 rocketman = { path = "services/rocketman" } 35 36 # CAR and IPLD dependencies 37 - iroh-car = "0.4" 38 libipld = { version = "0.16", features = ["dag-cbor", "dag-json"] } 39 cid = "0.11" 40 base64 = "0.22" 41 atmst = "0.0.1" 42 43 # Redis for job queues and caching 44 - redis = { version = "0.24", features = ["tokio-comp", "connection-manager"] }

··· 34 rocketman = { path = "services/rocketman" } 35 36 # CAR and IPLD dependencies 37 + iroh-car = "0.5" 38 libipld = { version = "0.16", features = ["dag-cbor", "dag-json"] } 39 cid = "0.11" 40 base64 = "0.22" 41 atmst = "0.0.1" 42 43 # Redis for job queues and caching 44 + redis = { version = "0.32", features = ["tokio-comp", "connection-manager"] }

+4 -4

apps/aqua/Dockerfile

··· 41 # Set up cross-compilation environment 42 ENV CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER=aarch64-linux-gnu-gcc 43 44 # Debug platform detection and run build 45 - RUN echo "DEBUG Before target.sh: TARGETPLATFORM=$TARGETPLATFORM TARGETARCH=$TARGETARCH" && \ 46 - . ./target.sh && \ 47 - touch src/main.rs && \ 48 echo "Building for $TARGET_ARCH" && \ 49 - cargo build --release --target $RUST_TARGET && \ 50 cp target/$RUST_TARGET/release/aqua target/aqua 51 52 FROM --platform=${TARGETARCH:-$BUILDPLATFORM} gcr.io/distroless/cc

··· 41 # Set up cross-compilation environment 42 ENV CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER=aarch64-linux-gnu-gcc 43 44 + 45 # Debug platform detection and run build 46 + RUN . ./target.sh && \ 47 + touch apps/aqua/src/main.rs && \ 48 echo "Building for $TARGET_ARCH" && \ 49 + cargo build --release --target $RUST_TARGET --package aqua && \ 50 cp target/$RUST_TARGET/release/aqua target/aqua 51 52 FROM --platform=${TARGETARCH:-$BUILDPLATFORM} gcr.io/distroless/cc

apps/aqua/target.sh target.sh

+1

lexicons/app

···

··· 1 + ../vendor/atproto/lexicons/app

+1

lexicons/chat

···

··· 1 + ../vendor/atproto/lexicons/chat

+1

lexicons/com

···

··· 1 + ../vendor/atproto/lexicons/com

+1

lexicons/tools

···

··· 1 + ../vendor/atproto/lexicons/tools

+8 -2

services/cadet/Dockerfile

··· 41 # Set up cross-compilation environment 42 ENV CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER=aarch64-linux-gnu-gcc 43 44 # Debug platform detection and run build 45 RUN echo "DEBUG Before target.sh: TARGETPLATFORM=$TARGETPLATFORM TARGETARCH=$TARGETARCH" && \ 46 . ./target.sh && \ 47 - touch src/main.rs && \ 48 echo "Building for $TARGET_ARCH" && \ 49 - cargo build --release --target $RUST_TARGET && \ 50 cp target/$RUST_TARGET/release/cadet target/cadet 51 52 FROM --platform=${TARGETARCH:-$BUILDPLATFORM} gcr.io/distroless/cc

··· 41 # Set up cross-compilation environment 42 ENV CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER=aarch64-linux-gnu-gcc 43 44 + # Force SQLx to use offline mode with workspace cache 45 + ENV SQLX_OFFLINE=true 46 + 47 + # copy sqlx in 48 + COPY .sqlx ./services/cadet/.sqlx 49 + 50 # Debug platform detection and run build 51 RUN echo "DEBUG Before target.sh: TARGETPLATFORM=$TARGETPLATFORM TARGETARCH=$TARGETARCH" && \ 52 . ./target.sh && \ 53 + touch services/cadet/src/main.rs && \ 54 echo "Building for $TARGET_ARCH" && \ 55 + cargo build --release --target $RUST_TARGET --package cadet && \ 56 cp target/$RUST_TARGET/release/cadet target/cadet 57 58 FROM --platform=${TARGETARCH:-$BUILDPLATFORM} gcr.io/distroless/cc

-226

services/migrations/20241220000001_initial_schema.sql

··· 1 - -- Initial comprehensive schema for Teal music platform 2 - -- Based on services/cadet/sql/base.sql 3 - 4 - CREATE TABLE artists ( 5 - mbid UUID PRIMARY KEY, 6 - name TEXT NOT NULL, 7 - play_count INTEGER DEFAULT 0 8 - ); 9 - 10 - -- releases are synologous to 'albums' 11 - CREATE TABLE releases ( 12 - mbid UUID PRIMARY KEY, 13 - name TEXT NOT NULL, 14 - play_count INTEGER DEFAULT 0 15 - ); 16 - 17 - -- recordings are synologous to 'tracks' BUT tracks can be in multiple releases! 18 - CREATE TABLE recordings ( 19 - mbid UUID PRIMARY KEY, 20 - name TEXT NOT NULL, 21 - play_count INTEGER DEFAULT 0 22 - ); 23 - 24 - CREATE TABLE plays ( 25 - uri TEXT PRIMARY KEY, 26 - did TEXT NOT NULL, 27 - rkey TEXT NOT NULL, 28 - cid TEXT NOT NULL, 29 - isrc TEXT, 30 - duration INTEGER, 31 - track_name TEXT NOT NULL, 32 - played_time TIMESTAMP WITH TIME ZONE, 33 - processed_time TIMESTAMP WITH TIME ZONE DEFAULT NOW(), 34 - release_mbid UUID, 35 - release_name TEXT, 36 - recording_mbid UUID, 37 - submission_client_agent TEXT, 38 - music_service_base_domain TEXT, 39 - origin_url TEXT, 40 - FOREIGN KEY (release_mbid) REFERENCES releases (mbid), 41 - FOREIGN KEY (recording_mbid) REFERENCES recordings (mbid) 42 - ); 43 - 44 - CREATE INDEX idx_plays_release_mbid ON plays (release_mbid); 45 - CREATE INDEX idx_plays_recording_mbid ON plays (recording_mbid); 46 - CREATE INDEX idx_plays_played_time ON plays (played_time); 47 - CREATE INDEX idx_plays_did ON plays (did); 48 - 49 - CREATE TABLE play_to_artists ( 50 - play_uri TEXT, -- references plays(uri) 51 - artist_mbid UUID REFERENCES artists (mbid), 52 - artist_name TEXT, -- storing here for ease of use when joining 53 - PRIMARY KEY (play_uri, artist_mbid), 54 - FOREIGN KEY (play_uri) REFERENCES plays (uri) 55 - ); 56 - 57 - CREATE INDEX idx_play_to_artists_artist ON play_to_artists (artist_mbid); 58 - 59 - -- Profiles table 60 - CREATE TABLE profiles ( 61 - did TEXT PRIMARY KEY, 62 - handle TEXT, 63 - display_name TEXT, 64 - description TEXT, 65 - description_facets JSONB, 66 - avatar TEXT, -- IPLD of the image, bafy... 67 - banner TEXT, 68 - created_at TIMESTAMP WITH TIME ZONE 69 - ); 70 - 71 - -- User featured items table 72 - CREATE TABLE featured_items ( 73 - did TEXT PRIMARY KEY, 74 - mbid TEXT NOT NULL, 75 - type TEXT NOT NULL 76 - ); 77 - 78 - -- Statii table (status records) 79 - CREATE TABLE statii ( 80 - uri TEXT PRIMARY KEY, 81 - did TEXT NOT NULL, 82 - rkey TEXT NOT NULL, 83 - cid TEXT NOT NULL, 84 - record JSONB NOT NULL, 85 - indexed_at TIMESTAMP WITH TIME ZONE DEFAULT NOW() 86 - ); 87 - 88 - CREATE INDEX idx_statii_did_rkey ON statii (did, rkey); 89 - 90 - -- Materialized view for artists' play counts 91 - CREATE MATERIALIZED VIEW mv_artist_play_counts AS 92 - SELECT 93 - a.mbid AS artist_mbid, 94 - a.name AS artist_name, 95 - COUNT(p.uri) AS play_count 96 - FROM 97 - artists a 98 - LEFT JOIN play_to_artists pta ON a.mbid = pta.artist_mbid 99 - LEFT JOIN plays p ON p.uri = pta.play_uri 100 - GROUP BY 101 - a.mbid, 102 - a.name; 103 - 104 - CREATE UNIQUE INDEX idx_mv_artist_play_counts ON mv_artist_play_counts (artist_mbid); 105 - 106 - -- Materialized view for releases' play counts 107 - CREATE MATERIALIZED VIEW mv_release_play_counts AS 108 - SELECT 109 - r.mbid AS release_mbid, 110 - r.name AS release_name, 111 - COUNT(p.uri) AS play_count 112 - FROM 113 - releases r 114 - LEFT JOIN plays p ON p.release_mbid = r.mbid 115 - GROUP BY 116 - r.mbid, 117 - r.name; 118 - 119 - CREATE UNIQUE INDEX idx_mv_release_play_counts ON mv_release_play_counts (release_mbid); 120 - 121 - -- Materialized view for recordings' play counts 122 - CREATE MATERIALIZED VIEW mv_recording_play_counts AS 123 - SELECT 124 - rec.mbid AS recording_mbid, 125 - rec.name AS recording_name, 126 - COUNT(p.uri) AS play_count 127 - FROM 128 - recordings rec 129 - LEFT JOIN plays p ON p.recording_mbid = rec.mbid 130 - GROUP BY 131 - rec.mbid, 132 - rec.name; 133 - 134 - CREATE UNIQUE INDEX idx_mv_recording_play_counts ON mv_recording_play_counts (recording_mbid); 135 - 136 - -- Global play count materialized view 137 - CREATE MATERIALIZED VIEW mv_global_play_count AS 138 - SELECT 139 - COUNT(uri) AS total_plays, 140 - COUNT(DISTINCT did) AS unique_listeners 141 - FROM plays; 142 - 143 - CREATE UNIQUE INDEX idx_mv_global_play_count ON mv_global_play_count(total_plays); 144 - 145 - -- Top artists in the last 30 days 146 - CREATE MATERIALIZED VIEW mv_top_artists_30days AS 147 - SELECT 148 - a.mbid AS artist_mbid, 149 - a.name AS artist_name, 150 - COUNT(p.uri) AS play_count 151 - FROM artists a 152 - INNER JOIN play_to_artists pta ON a.mbid = pta.artist_mbid 153 - INNER JOIN plays p ON p.uri = pta.play_uri 154 - WHERE p.played_time >= NOW() - INTERVAL '30 days' 155 - GROUP BY a.mbid, a.name 156 - ORDER BY COUNT(p.uri) DESC; 157 - 158 - -- Top releases in the last 30 days 159 - CREATE MATERIALIZED VIEW mv_top_releases_30days AS 160 - SELECT 161 - r.mbid AS release_mbid, 162 - r.name AS release_name, 163 - COUNT(p.uri) AS play_count 164 - FROM releases r 165 - INNER JOIN plays p ON p.release_mbid = r.mbid 166 - WHERE p.played_time >= NOW() - INTERVAL '30 days' 167 - GROUP BY r.mbid, r.name 168 - ORDER BY COUNT(p.uri) DESC; 169 - 170 - -- Top artists for user in the last 30 days 171 - CREATE MATERIALIZED VIEW mv_top_artists_for_user_30days AS 172 - SELECT 173 - prof.did, 174 - a.mbid AS artist_mbid, 175 - a.name AS artist_name, 176 - COUNT(p.uri) AS play_count 177 - FROM artists a 178 - INNER JOIN play_to_artists pta ON a.mbid = pta.artist_mbid 179 - INNER JOIN plays p ON p.uri = pta.play_uri 180 - INNER JOIN profiles prof ON prof.did = p.did 181 - WHERE p.played_time >= NOW() - INTERVAL '30 days' 182 - GROUP BY prof.did, a.mbid, a.name 183 - ORDER BY COUNT(p.uri) DESC; 184 - 185 - -- Top artists for user in the last 7 days 186 - CREATE MATERIALIZED VIEW mv_top_artists_for_user_7days AS 187 - SELECT 188 - prof.did, 189 - a.mbid AS artist_mbid, 190 - a.name AS artist_name, 191 - COUNT(p.uri) AS play_count 192 - FROM artists a 193 - INNER JOIN play_to_artists pta ON a.mbid = pta.artist_mbid 194 - INNER JOIN plays p ON p.uri = pta.play_uri 195 - INNER JOIN profiles prof ON prof.did = p.did 196 - WHERE p.played_time >= NOW() - INTERVAL '7 days' 197 - GROUP BY prof.did, a.mbid, a.name 198 - ORDER BY COUNT(p.uri) DESC; 199 - 200 - -- Top releases for user in the last 30 days 201 - CREATE MATERIALIZED VIEW mv_top_releases_for_user_30days AS 202 - SELECT 203 - prof.did, 204 - r.mbid AS release_mbid, 205 - r.name AS release_name, 206 - COUNT(p.uri) AS play_count 207 - FROM releases r 208 - INNER JOIN plays p ON p.release_mbid = r.mbid 209 - INNER JOIN profiles prof ON prof.did = p.did 210 - WHERE p.played_time >= NOW() - INTERVAL '30 days' 211 - GROUP BY prof.did, r.mbid, r.name 212 - ORDER BY COUNT(p.uri) DESC; 213 - 214 - -- Top releases for user in the last 7 days 215 - CREATE MATERIALIZED VIEW mv_top_releases_for_user_7days AS 216 - SELECT 217 - prof.did, 218 - r.mbid AS release_mbid, 219 - r.name AS release_name, 220 - COUNT(p.uri) AS play_count 221 - FROM releases r 222 - INNER JOIN plays p ON p.release_mbid = r.mbid 223 - INNER JOIN profiles prof ON prof.did = p.did 224 - WHERE p.played_time >= NOW() - INTERVAL '7 days' 225 - GROUP BY prof.did, r.mbid, r.name 226 - ORDER BY COUNT(p.uri) DESC;

···

-59

services/migrations/20241220000002_car_import_tables.sql

··· 1 - -- CAR import functionality tables 2 - -- For handling AT Protocol CAR file imports and processing 3 - 4 - -- Tracks uploaded CAR files that are queued for processing 5 - CREATE TABLE IF NOT EXISTS car_import_requests ( 6 - import_id TEXT PRIMARY KEY, 7 - car_data_base64 TEXT NOT NULL, 8 - status TEXT NOT NULL DEFAULT 'pending', -- pending, processing, completed, failed 9 - created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(), 10 - processed_at TIMESTAMP WITH TIME ZONE, 11 - error_message TEXT, 12 - file_size_bytes INTEGER, 13 - block_count INTEGER, 14 - extracted_records_count INTEGER DEFAULT 0 15 - ); 16 - 17 - CREATE INDEX idx_car_import_requests_status ON car_import_requests (status); 18 - CREATE INDEX idx_car_import_requests_created_at ON car_import_requests (created_at); 19 - 20 - -- Tracks raw IPLD blocks extracted from CAR files 21 - CREATE TABLE IF NOT EXISTS car_blocks ( 22 - cid TEXT PRIMARY KEY, 23 - import_id TEXT NOT NULL REFERENCES car_import_requests(import_id), 24 - block_data BYTEA NOT NULL, 25 - decoded_successfully BOOLEAN DEFAULT FALSE, 26 - collection_type TEXT, -- e.g., 'fm.teal.alpha.feed.play', 'commit', etc. 27 - created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW() 28 - ); 29 - 30 - CREATE INDEX idx_car_blocks_import_id ON car_blocks (import_id); 31 - CREATE INDEX idx_car_blocks_collection_type ON car_blocks (collection_type); 32 - 33 - -- Tracks records extracted from CAR imports that were successfully processed 34 - CREATE TABLE IF NOT EXISTS car_extracted_records ( 35 - id SERIAL PRIMARY KEY, 36 - import_id TEXT NOT NULL REFERENCES car_import_requests(import_id), 37 - cid TEXT NOT NULL REFERENCES car_blocks(cid), 38 - collection_type TEXT NOT NULL, 39 - record_uri TEXT, -- AT URI if applicable (e.g., for play records) 40 - synthetic_did TEXT, -- DID assigned for CAR imports (e.g., 'car-import:123') 41 - rkey TEXT, 42 - extracted_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(), 43 - processing_notes TEXT 44 - ); 45 - 46 - CREATE INDEX idx_car_extracted_records_import_id ON car_extracted_records (import_id); 47 - CREATE INDEX idx_car_extracted_records_collection_type ON car_extracted_records (collection_type); 48 - CREATE INDEX idx_car_extracted_records_record_uri ON car_extracted_records (record_uri); 49 - 50 - -- Tracks import metadata and commit information 51 - CREATE TABLE IF NOT EXISTS car_import_metadata ( 52 - import_id TEXT NOT NULL REFERENCES car_import_requests(import_id), 53 - metadata_key TEXT NOT NULL, 54 - metadata_value JSONB NOT NULL, 55 - created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(), 56 - PRIMARY KEY (import_id, metadata_key) 57 - ); 58 - 59 - CREATE INDEX idx_car_import_metadata_key ON car_import_metadata (metadata_key);

···

-112

services/migrations/20241220000003_artists_without_mbids.sql

··· 1 - -- Migration to support artists without MusicBrainz IDs 2 - -- This allows the system to comply with the Teal lexicon where only trackName is required 3 - 4 - -- Add a field to plays table to store raw artist names for records without MBIDs 5 - ALTER TABLE plays ADD COLUMN artist_names_raw JSONB; 6 - 7 - -- Create a new artists table that doesn't require MBID as primary key 8 - CREATE TABLE artists_extended ( 9 - id SERIAL PRIMARY KEY, 10 - mbid UUID UNIQUE, -- Optional MusicBrainz ID 11 - name TEXT NOT NULL, 12 - name_normalized TEXT GENERATED ALWAYS AS (LOWER(TRIM(name))) STORED, 13 - play_count INTEGER DEFAULT 0, 14 - created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(), 15 - updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW() 16 - ); 17 - 18 - -- Create index for efficient lookups 19 - CREATE INDEX idx_artists_extended_mbid ON artists_extended (mbid) WHERE mbid IS NOT NULL; 20 - CREATE INDEX idx_artists_extended_name_normalized ON artists_extended (name_normalized); 21 - CREATE UNIQUE INDEX idx_artists_extended_name_unique ON artists_extended (name_normalized) WHERE mbid IS NULL; 22 - 23 - -- Create a new junction table that can handle both MBID and non-MBID artists 24 - CREATE TABLE play_to_artists_extended ( 25 - play_uri TEXT NOT NULL REFERENCES plays(uri), 26 - artist_id INTEGER NOT NULL REFERENCES artists_extended(id), 27 - artist_name TEXT NOT NULL, -- Denormalized for performance 28 - PRIMARY KEY (play_uri, artist_id) 29 - ); 30 - 31 - CREATE INDEX idx_play_to_artists_extended_artist ON play_to_artists_extended (artist_id); 32 - 33 - -- Migrate existing data from old tables to new structure 34 - INSERT INTO artists_extended (mbid, name, play_count) 35 - SELECT mbid, name, play_count FROM artists; 36 - 37 - INSERT INTO play_to_artists_extended (play_uri, artist_id, artist_name) 38 - SELECT 39 - pta.play_uri, 40 - ae.id, 41 - pta.artist_name 42 - FROM play_to_artists pta 43 - JOIN artists_extended ae ON ae.mbid = pta.artist_mbid; 44 - 45 - -- Update materialized views to use new structure 46 - DROP MATERIALIZED VIEW IF EXISTS mv_artist_play_counts; 47 - CREATE MATERIALIZED VIEW mv_artist_play_counts AS 48 - SELECT 49 - ae.id AS artist_id, 50 - ae.mbid AS artist_mbid, 51 - ae.name AS artist_name, 52 - COUNT(p.uri) AS play_count 53 - FROM 54 - artists_extended ae 55 - LEFT JOIN play_to_artists_extended ptae ON ae.id = ptae.artist_id 56 - LEFT JOIN plays p ON p.uri = ptae.play_uri 57 - GROUP BY 58 - ae.id, ae.mbid, ae.name; 59 - 60 - CREATE UNIQUE INDEX idx_mv_artist_play_counts_new ON mv_artist_play_counts (artist_id); 61 - 62 - -- Update other materialized views that reference artists 63 - DROP MATERIALIZED VIEW IF EXISTS mv_top_artists_30days; 64 - CREATE MATERIALIZED VIEW mv_top_artists_30days AS 65 - SELECT 66 - ae.id AS artist_id, 67 - ae.mbid AS artist_mbid, 68 - ae.name AS artist_name, 69 - COUNT(p.uri) AS play_count 70 - FROM artists_extended ae 71 - INNER JOIN play_to_artists_extended ptae ON ae.id = ptae.artist_id 72 - INNER JOIN plays p ON p.uri = ptae.play_uri 73 - WHERE p.played_time >= NOW() - INTERVAL '30 days' 74 - GROUP BY ae.id, ae.mbid, ae.name 75 - ORDER BY COUNT(p.uri) DESC; 76 - 77 - DROP MATERIALIZED VIEW IF EXISTS mv_top_artists_for_user_30days; 78 - CREATE MATERIALIZED VIEW mv_top_artists_for_user_30days AS 79 - SELECT 80 - prof.did, 81 - ae.id AS artist_id, 82 - ae.mbid AS artist_mbid, 83 - ae.name AS artist_name, 84 - COUNT(p.uri) AS play_count 85 - FROM artists_extended ae 86 - INNER JOIN play_to_artists_extended ptae ON ae.id = ptae.artist_id 87 - INNER JOIN plays p ON p.uri = ptae.play_uri 88 - INNER JOIN profiles prof ON prof.did = p.did 89 - WHERE p.played_time >= NOW() - INTERVAL '30 days' 90 - GROUP BY prof.did, ae.id, ae.mbid, ae.name 91 - ORDER BY COUNT(p.uri) DESC; 92 - 93 - DROP MATERIALIZED VIEW IF EXISTS mv_top_artists_for_user_7days; 94 - CREATE MATERIALIZED VIEW mv_top_artists_for_user_7days AS 95 - SELECT 96 - prof.did, 97 - ae.id AS artist_id, 98 - ae.mbid AS artist_mbid, 99 - ae.name AS artist_name, 100 - COUNT(p.uri) AS play_count 101 - FROM artists_extended ae 102 - INNER JOIN play_to_artists_extended ptae ON ae.id = ptae.artist_id 103 - INNER JOIN plays p ON p.uri = ptae.play_uri 104 - INNER JOIN profiles prof ON prof.did = p.did 105 - WHERE p.played_time >= NOW() - INTERVAL '7 days' 106 - GROUP BY prof.did, ae.id, ae.mbid, ae.name 107 - ORDER BY COUNT(p.uri) DESC; 108 - 109 - -- Comment explaining the migration strategy 110 - COMMENT ON TABLE artists_extended IS 'Extended artists table that supports both MusicBrainz and non-MusicBrainz artists. Uses serial ID as primary key with optional MBID.'; 111 - COMMENT ON TABLE play_to_artists_extended IS 'Junction table linking plays to artists using the new artists_extended table structure.'; 112 - COMMENT ON COLUMN plays.artist_names_raw IS 'Raw artist names as JSON array for plays without MusicBrainz data, used as fallback when artist relationships cannot be established.';

···

-76

services/migrations/20241220000004_synthetic_mbids.sql

··· 1 - -- Migration to support synthetic MBIDs for artists without MusicBrainz data 2 - -- This ensures all artists have some form of ID while maintaining uniqueness 3 - 4 - -- Enable UUID extension for v5 UUID generation 5 - CREATE EXTENSION IF NOT EXISTS "uuid-ossp"; 6 - 7 - -- Add a column to track MBID type (musicbrainz, synthetic, unknown) 8 - ALTER TABLE artists_extended ADD COLUMN mbid_type TEXT DEFAULT 'unknown' NOT NULL; 9 - 10 - -- Add check constraint for valid MBID types 11 - ALTER TABLE artists_extended ADD CONSTRAINT chk_mbid_type 12 - CHECK (mbid_type IN ('musicbrainz', 'synthetic', 'unknown')); 13 - 14 - -- Update existing records to set proper MBID type 15 - UPDATE artists_extended SET mbid_type = 'musicbrainz' WHERE mbid IS NOT NULL; 16 - 17 - -- Drop the unique constraint on name_normalized for null MBIDs since we'll handle duplicates differently 18 - DROP INDEX IF EXISTS idx_artists_extended_name_unique; 19 - 20 - -- Add index for efficient querying by MBID type 21 - CREATE INDEX idx_artists_extended_mbid_type ON artists_extended (mbid_type); 22 - 23 - -- Create a view to easily work with different artist types 24 - CREATE VIEW artists_with_type AS 25 - SELECT 26 - id, 27 - mbid, 28 - name, 29 - mbid_type, 30 - play_count, 31 - created_at, 32 - updated_at, 33 - -- For synthetic MBIDs, we can show the source name used for generation 34 - CASE 35 - WHEN mbid_type = 'synthetic' THEN 'Generated from: ' || name 36 - WHEN mbid_type = 'musicbrainz' THEN 'MusicBrainz: ' || mbid::text 37 - ELSE 'No MBID available' 38 - END as mbid_info 39 - FROM artists_extended; 40 - 41 - -- Update materialized views to include MBID type information 42 - DROP MATERIALIZED VIEW IF EXISTS mv_artist_play_counts; 43 - CREATE MATERIALIZED VIEW mv_artist_play_counts AS 44 - SELECT 45 - ae.id AS artist_id, 46 - ae.mbid AS artist_mbid, 47 - ae.name AS artist_name, 48 - ae.mbid_type, 49 - COUNT(p.uri) AS play_count 50 - FROM 51 - artists_extended ae 52 - LEFT JOIN play_to_artists_extended ptae ON ae.id = ptae.artist_id 53 - LEFT JOIN plays p ON p.uri = ptae.play_uri 54 - GROUP BY 55 - ae.id, ae.mbid, ae.name, ae.mbid_type; 56 - 57 - CREATE UNIQUE INDEX idx_mv_artist_play_counts_with_type ON mv_artist_play_counts (artist_id); 58 - 59 - -- Add comments explaining the synthetic MBID system 60 - COMMENT ON COLUMN artists_extended.mbid_type IS 'Type of MBID: musicbrainz (real), synthetic (generated), or unknown (legacy data)'; 61 - COMMENT ON COLUMN artists_extended.mbid IS 'MusicBrainz ID (for musicbrainz type) or synthetic UUID (for synthetic type)'; 62 - COMMENT ON VIEW artists_with_type IS 'View that provides human-readable information about artist MBID sources'; 63 - 64 - -- Add a function to generate synthetic MBIDs 65 - CREATE OR REPLACE FUNCTION generate_synthetic_mbid(artist_name TEXT) RETURNS UUID AS $$ 66 - DECLARE 67 - namespace_uuid UUID := '6ba7b810-9dad-11d1-80b4-00c04fd430c8'; -- DNS namespace 68 - result_uuid UUID; 69 - BEGIN 70 - -- Generate deterministic UUID v5 based on artist name 71 - SELECT uuid_generate_v5(namespace_uuid, artist_name) INTO result_uuid; 72 - RETURN result_uuid; 73 - END; 74 - $$ LANGUAGE plpgsql IMMUTABLE; 75 - 76 - COMMENT ON FUNCTION generate_synthetic_mbid IS 'Generates a deterministic UUID v5 for artist names without MusicBrainz IDs';

···

-101

services/migrations/20241220000005_fuzzy_matching.sql

··· 1 - -- Migration to add fuzzy text matching capabilities 2 - -- This enables better artist name matching using trigram similarity 3 - 4 - -- Enable pg_trgm extension for trigram similarity matching 5 - CREATE EXTENSION IF NOT EXISTS pg_trgm; 6 - 7 - -- Create indexes for efficient trigram matching on artist names 8 - CREATE INDEX idx_artists_extended_name_trgm ON artists_extended USING gin (name gin_trgm_ops); 9 - CREATE INDEX idx_artists_extended_name_normalized_trgm ON artists_extended USING gin (name_normalized gin_trgm_ops); 10 - 11 - -- Create a function to calculate comprehensive artist similarity 12 - CREATE OR REPLACE FUNCTION calculate_artist_similarity( 13 - input_name TEXT, 14 - existing_name TEXT, 15 - input_album TEXT DEFAULT NULL, 16 - existing_album TEXT DEFAULT NULL 17 - ) RETURNS FLOAT AS $$ 18 - DECLARE 19 - name_similarity FLOAT; 20 - album_similarity FLOAT := 0.0; 21 - final_score FLOAT; 22 - BEGIN 23 - -- Calculate trigram similarity for artist names 24 - name_similarity := similarity(LOWER(TRIM(input_name)), LOWER(TRIM(existing_name))); 25 - 26 - -- Boost for exact matches after normalization 27 - IF LOWER(TRIM(regexp_replace(input_name, '[^a-zA-Z0-9\s]', '', 'g'))) = 28 - LOWER(TRIM(regexp_replace(existing_name, '[^a-zA-Z0-9\s]', '', 'g'))) THEN 29 - name_similarity := GREATEST(name_similarity, 0.95); 30 - END IF; 31 - 32 - -- Factor in album similarity if both are provided 33 - IF input_album IS NOT NULL AND existing_album IS NOT NULL THEN 34 - album_similarity := similarity(LOWER(TRIM(input_album)), LOWER(TRIM(existing_album))); 35 - -- Weight: 80% name, 20% album 36 - final_score := (name_similarity * 0.8) + (album_similarity * 0.2); 37 - ELSE 38 - final_score := name_similarity; 39 - END IF; 40 - 41 - RETURN final_score; 42 - END; 43 - $$ LANGUAGE plpgsql IMMUTABLE; 44 - 45 - -- Create a view for fuzzy artist matching with confidence scores 46 - CREATE VIEW fuzzy_artist_matches AS 47 - SELECT DISTINCT 48 - ae1.id as query_artist_id, 49 - ae1.name as query_artist_name, 50 - ae1.mbid_type as query_mbid_type, 51 - ae2.id as match_artist_id, 52 - ae2.name as match_artist_name, 53 - ae2.mbid as match_mbid, 54 - ae2.mbid_type as match_mbid_type, 55 - similarity(LOWER(TRIM(ae1.name)), LOWER(TRIM(ae2.name))) as name_similarity, 56 - CASE 57 - WHEN ae2.mbid_type = 'musicbrainz' THEN 'upgrade_to_mb' 58 - WHEN ae1.mbid_type = 'musicbrainz' AND ae2.mbid_type = 'synthetic' THEN 'consolidate_to_mb' 59 - ELSE 'merge_synthetic' 60 - END as match_action 61 - FROM artists_extended ae1 62 - CROSS JOIN artists_extended ae2 63 - WHERE ae1.id != ae2.id 64 - AND similarity(LOWER(TRIM(ae1.name)), LOWER(TRIM(ae2.name))) > 0.8 65 - AND ( 66 - ae1.mbid_type = 'synthetic' OR ae2.mbid_type = 'musicbrainz' 67 - ); 68 - 69 - -- Add comments 70 - COMMENT ON EXTENSION pg_trgm IS 'Trigram extension for fuzzy text matching'; 71 - COMMENT ON INDEX idx_artists_extended_name_trgm IS 'GIN index for trigram similarity on artist names'; 72 - COMMENT ON FUNCTION calculate_artist_similarity IS 'Calculates similarity score between artists considering name and optional album context'; 73 - COMMENT ON VIEW fuzzy_artist_matches IS 'Shows potential artist matches with confidence scores and recommended actions'; 74 - 75 - -- Create a function to suggest artist consolidations 76 - CREATE OR REPLACE FUNCTION suggest_artist_consolidations(min_similarity FLOAT DEFAULT 0.9) 77 - RETURNS TABLE( 78 - action TEXT, 79 - synthetic_artist TEXT, 80 - target_artist TEXT, 81 - similarity_score FLOAT, 82 - synthetic_plays INTEGER, 83 - target_plays INTEGER 84 - ) AS $$ 85 - BEGIN 86 - RETURN QUERY 87 - SELECT 88 - fam.match_action as action, 89 - fam.query_artist_name as synthetic_artist, 90 - fam.match_artist_name as target_artist, 91 - fam.name_similarity as similarity_score, 92 - (SELECT COUNT(*)::INTEGER FROM play_to_artists_extended WHERE artist_id = fam.query_artist_id) as synthetic_plays, 93 - (SELECT COUNT(*)::INTEGER FROM play_to_artists_extended WHERE artist_id = fam.match_artist_id) as target_plays 94 - FROM fuzzy_artist_matches fam 95 - WHERE fam.name_similarity >= min_similarity 96 - AND fam.match_action = 'upgrade_to_mb' 97 - ORDER BY fam.name_similarity DESC, synthetic_plays DESC; 98 - END; 99 - $$ LANGUAGE plpgsql; 100 - 101 - COMMENT ON FUNCTION suggest_artist_consolidations IS 'Returns suggestions for consolidating synthetic artists with MusicBrainz artists based on similarity';

···

-138

services/migrations/20241220000006_discriminant_fields.sql

··· 1 - -- Migration to add discriminant fields for track and release variants 2 - -- This enables proper handling of different versions while maintaining grouping capabilities 3 - 4 - -- Add discriminant fields to plays table 5 - ALTER TABLE plays ADD COLUMN track_discriminant TEXT; 6 - ALTER TABLE plays ADD COLUMN release_discriminant TEXT; 7 - 8 - -- Add discriminant field to releases table 9 - ALTER TABLE releases ADD COLUMN discriminant TEXT; 10 - 11 - -- Add discriminant field to recordings table 12 - ALTER TABLE recordings ADD COLUMN discriminant TEXT; 13 - 14 - -- Create indexes for efficient searching and filtering 15 - CREATE INDEX idx_plays_track_discriminant ON plays (track_discriminant); 16 - CREATE INDEX idx_plays_release_discriminant ON plays (release_discriminant); 17 - CREATE INDEX idx_releases_discriminant ON releases (discriminant); 18 - CREATE INDEX idx_recordings_discriminant ON recordings (discriminant); 19 - 20 - -- Create composite indexes for grouping by base name + discriminant 21 - CREATE INDEX idx_plays_track_name_discriminant ON plays (track_name, track_discriminant); 22 - CREATE INDEX idx_plays_release_name_discriminant ON plays (release_name, release_discriminant); 23 - 24 - -- Update materialized views to include discriminant information 25 - DROP MATERIALIZED VIEW IF EXISTS mv_release_play_counts; 26 - CREATE MATERIALIZED VIEW mv_release_play_counts AS 27 - SELECT 28 - r.mbid AS release_mbid, 29 - r.name AS release_name, 30 - r.discriminant AS release_discriminant, 31 - COUNT(p.uri) AS play_count 32 - FROM 33 - releases r 34 - LEFT JOIN plays p ON p.release_mbid = r.mbid 35 - GROUP BY 36 - r.mbid, r.name, r.discriminant; 37 - 38 - CREATE UNIQUE INDEX idx_mv_release_play_counts_discriminant ON mv_release_play_counts (release_mbid); 39 - 40 - DROP MATERIALIZED VIEW IF EXISTS mv_recording_play_counts; 41 - CREATE MATERIALIZED VIEW mv_recording_play_counts AS 42 - SELECT 43 - rec.mbid AS recording_mbid, 44 - rec.name AS recording_name, 45 - rec.discriminant AS recording_discriminant, 46 - COUNT(p.uri) AS play_count 47 - FROM 48 - recordings rec 49 - LEFT JOIN plays p ON p.recording_mbid = rec.mbid 50 - GROUP BY 51 - rec.mbid, rec.name, rec.discriminant; 52 - 53 - CREATE UNIQUE INDEX idx_mv_recording_play_counts_discriminant ON mv_recording_play_counts (recording_mbid); 54 - 55 - -- Create views for analyzing track/release variants 56 - CREATE VIEW track_variants AS 57 - SELECT 58 - track_name, 59 - track_discriminant, 60 - COUNT(*) AS play_count, 61 - COUNT(DISTINCT did) AS unique_listeners, 62 - COUNT(DISTINCT recording_mbid) AS unique_recordings 63 - FROM plays 64 - WHERE track_name IS NOT NULL 65 - GROUP BY track_name, track_discriminant 66 - ORDER BY track_name, play_count DESC; 67 - 68 - CREATE VIEW release_variants AS 69 - SELECT 70 - release_name, 71 - release_discriminant, 72 - COUNT(*) AS play_count, 73 - COUNT(DISTINCT did) AS unique_listeners, 74 - COUNT(DISTINCT release_mbid) AS unique_releases 75 - FROM plays 76 - WHERE release_name IS NOT NULL 77 - GROUP BY release_name, release_discriminant 78 - ORDER BY release_name, play_count DESC; 79 - 80 - -- Create function to extract potential discriminants from existing names 81 - CREATE OR REPLACE FUNCTION extract_discriminant(name_text TEXT) RETURNS TEXT AS $$ 82 - DECLARE 83 - discriminant_patterns TEXT[] := ARRAY[ 84 - '$([^)]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus).*?)$', 85 - '\[([^]]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus).*?)\]', 86 - '\{([^}]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus).*?)\}' 87 - ]; 88 - pattern TEXT; 89 - match_result TEXT; 90 - BEGIN 91 - -- Try each pattern to find discriminant information 92 - FOREACH pattern IN ARRAY discriminant_patterns 93 - LOOP 94 - SELECT substring(name_text FROM pattern) INTO match_result; 95 - IF match_result IS NOT NULL AND length(trim(match_result)) > 0 THEN 96 - RETURN trim(match_result); 97 - END IF; 98 - END LOOP; 99 - 100 - RETURN NULL; 101 - END; 102 - $$ LANGUAGE plpgsql IMMUTABLE; 103 - 104 - -- Create function to get base name without discriminant 105 - CREATE OR REPLACE FUNCTION get_base_name(name_text TEXT) RETURNS TEXT AS $$ 106 - DECLARE 107 - cleanup_patterns TEXT[] := ARRAY[ 108 - '\s*$[^)]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus).*?$\s*', 109 - '\s*\[[^]]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus).*?\]\s*', 110 - '\s*\{[^}]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus).*?\}\s*' 111 - ]; 112 - pattern TEXT; 113 - result_text TEXT := name_text; 114 - BEGIN 115 - -- Remove discriminant patterns to get base name 116 - FOREACH pattern IN ARRAY cleanup_patterns 117 - LOOP 118 - result_text := regexp_replace(result_text, pattern, ' ', 'gi'); 119 - END LOOP; 120 - 121 - -- Clean up extra whitespace 122 - result_text := regexp_replace(trim(result_text), '\s+', ' ', 'g'); 123 - 124 - RETURN result_text; 125 - END; 126 - $$ LANGUAGE plpgsql IMMUTABLE; 127 - 128 - -- Add comments explaining the discriminant system 129 - COMMENT ON COLUMN plays.track_discriminant IS 'Distinguishing information for track variants (e.g., "Acoustic Version", "Live at Wembley", "Radio Edit")'; 130 - COMMENT ON COLUMN plays.release_discriminant IS 'Distinguishing information for release variants (e.g., "Deluxe Edition", "Remastered", "2023 Remaster")'; 131 - COMMENT ON COLUMN releases.discriminant IS 'Distinguishing information for release variants to enable proper grouping'; 132 - COMMENT ON COLUMN recordings.discriminant IS 'Distinguishing information for recording variants to enable proper grouping'; 133 - 134 - COMMENT ON VIEW track_variants IS 'Shows all variants of tracks with their play counts and unique listeners'; 135 - COMMENT ON VIEW release_variants IS 'Shows all variants of releases with their play counts and unique listeners'; 136 - 137 - COMMENT ON FUNCTION extract_discriminant IS 'Extracts discriminant information from track/release names for migration purposes'; 138 - COMMENT ON FUNCTION get_base_name IS 'Returns the base name without discriminant information for grouping purposes';

···

-276

services/migrations/20241220000007_enhanced_discriminant_extraction.sql

··· 1 - -- Enhanced discriminant extraction with comprehensive edition/version patterns 2 - -- This migration improves the auto-population of discriminants for better metadata handling 3 - 4 - -- Drop existing functions to replace them with enhanced versions 5 - DROP FUNCTION IF EXISTS extract_discriminant(TEXT); 6 - DROP FUNCTION IF EXISTS get_base_name(TEXT); 7 - 8 - -- Enhanced function to extract discriminants with comprehensive patterns 9 - CREATE OR REPLACE FUNCTION extract_discriminant(name_text TEXT) RETURNS TEXT AS $$ 10 - DECLARE 11 - -- Comprehensive patterns for discriminant extraction 12 - discriminant_patterns TEXT[] := ARRAY[ 13 - -- Parentheses patterns 14 - '$([^)]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray|hdtv|web|retail|promo|single|ep|lp|maxi|mini|radio|club|dance|house|techno|trance|ambient|classical|jazz|folk|country|rock|pop|metal|punk|indie|alternative).*?)$', 15 - '$([^)]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?)$', 16 - '$([^)]*(?:vol\.|volume|pt\.|part|disc|disk|cd)\s*\d+.*?)$', 17 - '$([^)]*(?:feat\.|featuring|ft\.|with|vs\.|versus|&|and)\s+.*?)$', 18 - '$([^)]*(?:from|soundtrack|ost|score|theme).*?)$', 19 - 20 - -- Brackets patterns 21 - '\[([^]]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray|hdtv|web|retail|promo|single|ep|lp|maxi|mini|radio|club|dance|house|techno|trance|ambient|classical|jazz|folk|country|rock|pop|metal|punk|indie|alternative).*?)\]', 22 - '\[([^]]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?)\]', 23 - '\[([^]]*(?:vol\.|volume|pt\.|part|disc|disk|cd)\s*\d+.*?)\]', 24 - '\[([^]]*(?:feat\.|featuring|ft\.|with|vs\.|versus|&|and)\s+.*?)\]', 25 - '\[([^]]*(?:from|soundtrack|ost|score|theme).*?)\]', 26 - 27 - -- Braces patterns 28 - '\{([^}]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray|hdtv|web|retail|promo|single|ep|lp|maxi|mini|radio|club|dance|house|techno|trance|ambient|classical|jazz|folk|country|rock|pop|metal|punk|indie|alternative).*?)\}', 29 - '\{([^}]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?)\}', 30 - '\{([^}]*(?:vol\.|volume|pt\.|part|disc|disk|cd)\s*\d+.*?)\}', 31 - '\{([^}]*(?:feat\.|featuring|ft\.|with|vs\.|versus|&|and)\s+.*?)\}', 32 - '\{([^}]*(?:from|soundtrack|ost|score|theme).*?)\}', 33 - 34 - -- Dash/hyphen patterns (common for editions) 35 - '[-–—]\s*([^-–—]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray).*?)$', 36 - '[-–—]\s*(\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?$', 37 - 38 - -- Colon patterns (common for subtitles and versions) 39 - ':\s*([^:]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive).*?)$', 40 - ':\s*(\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?$' 41 - ]; 42 - 43 - pattern TEXT; 44 - match_result TEXT; 45 - BEGIN 46 - -- Return early if input is null or empty 47 - IF name_text IS NULL OR trim(name_text) = '' THEN 48 - RETURN NULL; 49 - END IF; 50 - 51 - -- Try each pattern to find discriminant information 52 - FOREACH pattern IN ARRAY discriminant_patterns 53 - LOOP 54 - SELECT substring(name_text FROM pattern COLLATE "C") INTO match_result; 55 - IF match_result IS NOT NULL AND length(trim(match_result)) > 0 THEN 56 - -- Clean up the match result 57 - match_result := trim(match_result); 58 - -- Remove leading/trailing punctuation 59 - match_result := regexp_replace(match_result, '^[^\w]+|[^\w]+$', '', 'g'); 60 - -- Ensure it's not just whitespace or empty after cleanup 61 - IF length(trim(match_result)) > 0 THEN 62 - RETURN match_result; 63 - END IF; 64 - END IF; 65 - END LOOP; 66 - 67 - RETURN NULL; 68 - END; 69 - $$ LANGUAGE plpgsql IMMUTABLE; 70 - 71 - -- Enhanced function to get base name without discriminant 72 - CREATE OR REPLACE FUNCTION get_base_name(name_text TEXT) RETURNS TEXT AS $$ 73 - DECLARE 74 - -- Comprehensive cleanup patterns matching the extraction patterns 75 - cleanup_patterns TEXT[] := ARRAY[ 76 - -- Remove parentheses content 77 - '\s*$[^)]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray|hdtv|web|retail|promo|single|ep|lp|maxi|mini|radio|club|dance|house|techno|trance|ambient|classical|jazz|folk|country|rock|pop|metal|punk|indie|alternative).*?$\s*', 78 - '\s*$[^)]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?$\s*', 79 - '\s*$[^)]*(?:vol\.|volume|pt\.|part|disc|disk|cd)\s*\d+.*?$\s*', 80 - '\s*$[^)]*(?:feat\.|featuring|ft\.|with|vs\.|versus|&|and)\s+.*?$\s*', 81 - '\s*$[^)]*(?:from|soundtrack|ost|score|theme).*?$\s*', 82 - 83 - -- Remove brackets content 84 - '\s*\[[^]]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray|hdtv|web|retail|promo|single|ep|lp|maxi|mini|radio|club|dance|house|techno|trance|ambient|classical|jazz|folk|country|rock|pop|metal|punk|indie|alternative).*?\]\s*', 85 - '\s*\[[^]]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?\]\s*', 86 - '\s*\[[^]]*(?:vol\.|volume|pt\.|part|disc|disk|cd)\s*\d+.*?\]\s*', 87 - '\s*\[[^]]*(?:feat\.|featuring|ft\.|with|vs\.|versus|&|and)\s+.*?\]\s*', 88 - '\s*\[[^]]*(?:from|soundtrack|ost|score|theme).*?\]\s*', 89 - 90 - -- Remove braces content 91 - '\s*\{[^}]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray|hdtv|web|retail|promo|single|ep|lp|maxi|mini|radio|club|dance|house|techno|trance|ambient|classical|jazz|folk|country|rock|pop|metal|punk|indie|alternative).*?\}\s*', 92 - '\s*\{[^}]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?\}\s*', 93 - '\s*\{[^}]*(?:vol\.|volume|pt\.|part|disc|disk|cd)\s*\d+.*?\}\s*', 94 - '\s*\{[^}]*(?:feat\.|featuring|ft\.|with|vs\.|versus|&|and)\s+.*?\}\s*', 95 - '\s*\{[^}]*(?:from|soundtrack|ost|score|theme).*?\}\s*', 96 - 97 - -- Remove dash/hyphen patterns 98 - '\s*[-–—]\s*[^-–—]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray).*?$', 99 - '\s*[-–—]\s*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?$', 100 - 101 - -- Remove colon patterns 102 - '\s*:\s*[^:]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive).*?$', 103 - '\s*:\s*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?$' 104 - ]; 105 - 106 - pattern TEXT; 107 - result_text TEXT := name_text; 108 - BEGIN 109 - -- Return early if input is null or empty 110 - IF name_text IS NULL OR trim(name_text) = '' THEN 111 - RETURN name_text; 112 - END IF; 113 - 114 - -- Remove discriminant patterns to get base name 115 - FOREACH pattern IN ARRAY cleanup_patterns 116 - LOOP 117 - result_text := regexp_replace(result_text, pattern, ' ', 'gi'); 118 - END LOOP; 119 - 120 - -- Clean up extra whitespace and normalize 121 - result_text := regexp_replace(trim(result_text), '\s+', ' ', 'g'); 122 - 123 - -- Remove trailing punctuation that might be left after removal 124 - result_text := regexp_replace(result_text, '[,;:\-–—]\s*$', '', 'g'); 125 - result_text := trim(result_text); 126 - 127 - -- Ensure we don't return an empty string 128 - IF length(result_text) = 0 THEN 129 - RETURN name_text; 130 - END IF; 131 - 132 - RETURN result_text; 133 - END; 134 - $$ LANGUAGE plpgsql IMMUTABLE; 135 - 136 - -- Create function to extract discriminant specifically for editions and versions 137 - CREATE OR REPLACE FUNCTION extract_edition_discriminant(name_text TEXT) RETURNS TEXT AS $$ 138 - DECLARE 139 - -- Focused patterns for edition/version extraction 140 - edition_patterns TEXT[] := ARRAY[ 141 - -- Edition patterns 142 - '$([^)]*edition[^)]*)$', 143 - '\[([^]]*edition[^]]*)\]', 144 - '\{([^}]*edition[^}]*)\}', 145 - '[-–—]\s*([^-–—]*edition[^-–—]*)$', 146 - ':\s*([^:]*edition[^:]*)$', 147 - 148 - -- Version patterns 149 - '$([^)]*version[^)]*)$', 150 - '\[([^]]*version[^]]*)\]', 151 - '\{([^}]*version[^}]*)\}', 152 - '[-–—]\s*([^-–—]*version[^-–—]*)$', 153 - ':\s*([^:]*version[^:]*)$', 154 - 155 - -- Remaster patterns 156 - '$([^)]*remaster[^)]*)$', 157 - '\[([^]]*remaster[^]]*)\]', 158 - '\{([^}]*remaster[^}]*)\}', 159 - '[-–—]\s*([^-–—]*remaster[^-–—]*)$', 160 - ':\s*([^:]*remaster[^:]*)$', 161 - 162 - -- Year-based patterns 163 - '$([^)]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release)[^)]*)$', 164 - '\[([^]]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release)[^]]*)\]', 165 - '\{([^}]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release)[^}]*)\}' 166 - ]; 167 - 168 - pattern TEXT; 169 - match_result TEXT; 170 - BEGIN 171 - -- Return early if input is null or empty 172 - IF name_text IS NULL OR trim(name_text) = '' THEN 173 - RETURN NULL; 174 - END IF; 175 - 176 - -- Try edition-specific patterns first 177 - FOREACH pattern IN ARRAY edition_patterns 178 - LOOP 179 - SELECT substring(name_text FROM pattern COLLATE "C") INTO match_result; 180 - IF match_result IS NOT NULL AND length(trim(match_result)) > 0 THEN 181 - match_result := trim(match_result); 182 - match_result := regexp_replace(match_result, '^[^\w]+|[^\w]+$', '', 'g'); 183 - IF length(trim(match_result)) > 0 THEN 184 - RETURN match_result; 185 - END IF; 186 - END IF; 187 - END LOOP; 188 - 189 - RETURN NULL; 190 - END; 191 - $$ LANGUAGE plpgsql IMMUTABLE; 192 - 193 - -- Update recordings table to populate discriminants from existing names 194 - UPDATE recordings 195 - SET discriminant = extract_discriminant(name) 196 - WHERE discriminant IS NULL 197 - AND extract_discriminant(name) IS NOT NULL; 198 - 199 - -- Update releases table to populate discriminants from existing names 200 - UPDATE releases 201 - SET discriminant = extract_discriminant(name) 202 - WHERE discriminant IS NULL 203 - AND extract_discriminant(name) IS NOT NULL; 204 - 205 - -- Update plays table to populate discriminants from existing names where not already set 206 - UPDATE plays 207 - SET track_discriminant = extract_discriminant(track_name) 208 - WHERE track_discriminant IS NULL 209 - AND extract_discriminant(track_name) IS NOT NULL; 210 - 211 - UPDATE plays 212 - SET release_discriminant = extract_discriminant(release_name) 213 - WHERE release_discriminant IS NULL 214 - AND release_name IS NOT NULL 215 - AND extract_discriminant(release_name) IS NOT NULL; 216 - 217 - -- Create indexes for efficient discriminant queries 218 - CREATE INDEX IF NOT EXISTS idx_recordings_name_discriminant ON recordings (name, discriminant); 219 - CREATE INDEX IF NOT EXISTS idx_releases_name_discriminant ON releases (name, discriminant); 220 - 221 - -- Add comments for the new function 222 - COMMENT ON FUNCTION extract_discriminant IS 'Enhanced discriminant extraction supporting comprehensive edition/version patterns including parentheses, brackets, braces, dashes, and colons'; 223 - COMMENT ON FUNCTION get_base_name IS 'Enhanced base name extraction removing comprehensive discriminant patterns to enable proper grouping'; 224 - COMMENT ON FUNCTION extract_edition_discriminant IS 'Specialized function for extracting edition and version discriminants with focused patterns'; 225 - 226 - -- Create a view to show discriminant extraction results for analysis 227 - CREATE OR REPLACE VIEW discriminant_analysis AS 228 - SELECT 229 - 'recordings' as table_name, 230 - name as original_name, 231 - discriminant, 232 - get_base_name(name) as base_name, 233 - extract_discriminant(name) as extracted_discriminant, 234 - extract_edition_discriminant(name) as edition_discriminant 235 - FROM recordings 236 - WHERE name IS NOT NULL 237 - UNION ALL 238 - SELECT 239 - 'releases' as table_name, 240 - name as original_name, 241 - discriminant, 242 - get_base_name(name) as base_name, 243 - extract_discriminant(name) as extracted_discriminant, 244 - extract_edition_discriminant(name) as edition_discriminant 245 - FROM releases 246 - WHERE name IS NOT NULL; 247 - 248 - COMMENT ON VIEW discriminant_analysis IS 'Analysis view showing discriminant extraction results for quality assessment and debugging'; 249 - 250 - -- Refresh materialized views to include discriminant information 251 - REFRESH MATERIALIZED VIEW mv_release_play_counts; 252 - REFRESH MATERIALIZED VIEW mv_recording_play_counts; 253 - 254 - -- Create summary statistics for discriminant usage 255 - CREATE OR REPLACE VIEW discriminant_stats AS 256 - SELECT 257 - 'recordings' as entity_type, 258 - COUNT(*) as total_count, 259 - COUNT(CASE WHEN discriminant IS NOT NULL THEN 1 END) as with_discriminant, 260 - COUNT(CASE WHEN discriminant IS NULL AND extract_discriminant(name) IS NOT NULL THEN 1 END) as extractable_discriminant, 261 - ROUND( 262 - COUNT(CASE WHEN discriminant IS NOT NULL THEN 1 END) * 100.0 / COUNT(*), 2 263 - ) as discriminant_percentage 264 - FROM recordings 265 - UNION ALL 266 - SELECT 267 - 'releases' as entity_type, 268 - COUNT(*) as total_count, 269 - COUNT(CASE WHEN discriminant IS NOT NULL THEN 1 END) as with_discriminant, 270 - COUNT(CASE WHEN discriminant IS NULL AND extract_discriminant(name) IS NOT NULL THEN 1 END) as extractable_discriminant, 271 - ROUND( 272 - COUNT(CASE WHEN discriminant IS NOT NULL THEN 1 END) * 100.0 / COUNT(*), 2 273 - ) as discriminant_percentage 274 - FROM releases; 275 - 276 - COMMENT ON VIEW discriminant_stats IS 'Statistics showing discriminant usage and extraction potential across entity types';

···

-252

services/migrations/20241220000008_fix_discriminant_case_sensitivity.sql

··· 1 - -- Fix case sensitivity in discriminant extraction patterns 2 - -- This migration updates the discriminant extraction functions to properly handle case-insensitive matching 3 - 4 - -- Drop dependent views first, then functions, then recreate everything 5 - DROP VIEW IF EXISTS discriminant_analysis CASCADE; 6 - DROP VIEW IF EXISTS discriminant_stats CASCADE; 7 - 8 - -- Drop existing functions to replace with case-insensitive versions 9 - DROP FUNCTION IF EXISTS extract_discriminant(TEXT) CASCADE; 10 - DROP FUNCTION IF EXISTS get_base_name(TEXT) CASCADE; 11 - DROP FUNCTION IF EXISTS extract_edition_discriminant(TEXT) CASCADE; 12 - 13 - -- Enhanced function to extract discriminants with case-insensitive matching 14 - CREATE OR REPLACE FUNCTION extract_discriminant(name_text TEXT) RETURNS TEXT AS $$ 15 - DECLARE 16 - -- Comprehensive patterns for discriminant extraction with case-insensitive flags 17 - discriminant_patterns TEXT[] := ARRAY[ 18 - -- Parentheses patterns 19 - '(?i)$([^)]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray|hdtv|web|retail|promo|single|ep|lp|maxi|mini|radio|club|dance|house|techno|trance|ambient|classical|jazz|folk|country|rock|pop|metal|punk|indie|alternative).*?)$', 20 - '(?i)$([^)]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?)$', 21 - '(?i)$([^)]*(?:vol\.|volume|pt\.|part|disc|disk|cd)\s*\d+.*?)$', 22 - '(?i)$([^)]*(?:feat\.|featuring|ft\.|with|vs\.|versus|&|and)\s+.*?)$', 23 - '(?i)$([^)]*(?:from|soundtrack|ost|score|theme).*?)$', 24 - 25 - -- Brackets patterns 26 - '(?i)\[([^]]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray|hdtv|web|retail|promo|single|ep|lp|maxi|mini|radio|club|dance|house|techno|trance|ambient|classical|jazz|folk|country|rock|pop|metal|punk|indie|alternative).*?)\]', 27 - '(?i)\[([^]]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?)\]', 28 - '(?i)\[([^]]*(?:vol\.|volume|pt\.|part|disc|disk|cd)\s*\d+.*?)\]', 29 - '(?i)\[([^]]*(?:feat\.|featuring|ft\.|with|vs\.|versus|&|and)\s+.*?)\]', 30 - '(?i)\[([^]]*(?:from|soundtrack|ost|score|theme).*?)\]', 31 - 32 - -- Braces patterns 33 - '(?i)\{([^}]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray|hdtv|web|retail|promo|single|ep|lp|maxi|mini|radio|club|dance|house|techno|trance|ambient|classical|jazz|folk|country|rock|pop|metal|punk|indie|alternative).*?)\}', 34 - '(?i)\{([^}]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?)\}', 35 - '(?i)\{([^}]*(?:vol\.|volume|pt\.|part|disc|disk|cd)\s*\d+.*?)\}', 36 - '(?i)\{([^}]*(?:feat\.|featuring|ft\.|with|vs\.|versus|&|and)\s+.*?)\}', 37 - '(?i)\{([^}]*(?:from|soundtrack|ost|score|theme).*?)\}', 38 - 39 - -- Dash/hyphen patterns (common for editions) 40 - '(?i)[-–—]\s*([^-–—]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray).*?)$', 41 - '(?i)[-–—]\s*(\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?$', 42 - 43 - -- Colon patterns (common for subtitles and versions) 44 - '(?i):\s*([^:]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive).*?)$', 45 - '(?i):\s*(\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?$' 46 - ]; 47 - 48 - pattern TEXT; 49 - match_result TEXT; 50 - BEGIN 51 - -- Return early if input is null or empty 52 - IF name_text IS NULL OR trim(name_text) = '' THEN 53 - RETURN NULL; 54 - END IF; 55 - 56 - -- Try each pattern to find discriminant information 57 - FOREACH pattern IN ARRAY discriminant_patterns 58 - LOOP 59 - SELECT substring(name_text FROM pattern) INTO match_result; 60 - IF match_result IS NOT NULL AND length(trim(match_result)) > 0 THEN 61 - -- Clean up the match result 62 - match_result := trim(match_result); 63 - -- Remove leading/trailing punctuation 64 - match_result := regexp_replace(match_result, '^[^\w]+|[^\w]+$', '', 'g'); 65 - -- Ensure it's not just whitespace or empty after cleanup 66 - IF length(trim(match_result)) > 0 THEN 67 - RETURN match_result; 68 - END IF; 69 - END IF; 70 - END LOOP; 71 - 72 - RETURN NULL; 73 - END; 74 - $$ LANGUAGE plpgsql IMMUTABLE; 75 - 76 - -- Enhanced function to get base name without discriminant with case-insensitive matching 77 - CREATE OR REPLACE FUNCTION get_base_name(name_text TEXT) RETURNS TEXT AS $$ 78 - DECLARE 79 - -- Comprehensive cleanup patterns matching the extraction patterns 80 - cleanup_patterns TEXT[] := ARRAY[ 81 - -- Remove parentheses content 82 - '(?i)\s*$[^)]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray|hdtv|web|retail|promo|single|ep|lp|maxi|mini|radio|club|dance|house|techno|trance|ambient|classical|jazz|folk|country|rock|pop|metal|punk|indie|alternative).*?$\s*', 83 - '(?i)\s*$[^)]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?$\s*', 84 - '(?i)\s*$[^)]*(?:vol\.|volume|pt\.|part|disc|disk|cd)\s*\d+.*?$\s*', 85 - '(?i)\s*$[^)]*(?:feat\.|featuring|ft\.|with|vs\.|versus|&|and)\s+.*?$\s*', 86 - '(?i)\s*$[^)]*(?:from|soundtrack|ost|score|theme).*?$\s*', 87 - 88 - -- Remove brackets content 89 - '(?i)\s*\[[^]]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray|hdtv|web|retail|promo|single|ep|lp|maxi|mini|radio|club|dance|house|techno|trance|ambient|classical|jazz|folk|country|rock|pop|metal|punk|indie|alternative).*?\]\s*', 90 - '(?i)\s*\[[^]]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?\]\s*', 91 - '(?i)\s*\[[^]]*(?:vol\.|volume|pt\.|part|disc|disk|cd)\s*\d+.*?\]\s*', 92 - '(?i)\s*\[[^]]*(?:feat\.|featuring|ft\.|with|vs\.|versus|&|and)\s+.*?\]\s*', 93 - '(?i)\s*\[[^]]*(?:from|soundtrack|ost|score|theme).*?\]\s*', 94 - 95 - -- Remove braces content 96 - '(?i)\s*\{[^}]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray|hdtv|web|retail|promo|single|ep|lp|maxi|mini|radio|club|dance|house|techno|trance|ambient|classical|jazz|folk|country|rock|pop|metal|punk|indie|alternative).*?\}\s*', 97 - '(?i)\s*\{[^}]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?\}\s*', 98 - '(?i)\s*\{[^}]*(?:vol\.|volume|pt\.|part|disc|disk|cd)\s*\d+.*?\}\s*', 99 - '(?i)\s*\{[^}]*(?:feat\.|featuring|ft\.|with|vs\.|versus|&|and)\s+.*?\}\s*', 100 - '(?i)\s*\{[^}]*(?:from|soundtrack|ost|score|theme).*?\}\s*', 101 - 102 - -- Remove dash/hyphen patterns 103 - '(?i)\s*[-–—]\s*[^-–—]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray).*?$', 104 - '(?i)\s*[-–—]\s*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?$', 105 - 106 - -- Remove colon patterns 107 - '(?i)\s*:\s*[^:]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive).*?$', 108 - '(?i)\s*:\s*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?$' 109 - ]; 110 - 111 - pattern TEXT; 112 - result_text TEXT := name_text; 113 - BEGIN 114 - -- Return early if input is null or empty 115 - IF name_text IS NULL OR trim(name_text) = '' THEN 116 - RETURN name_text; 117 - END IF; 118 - 119 - -- Remove discriminant patterns to get base name 120 - FOREACH pattern IN ARRAY cleanup_patterns 121 - LOOP 122 - result_text := regexp_replace(result_text, pattern, ' ', 'g'); 123 - END LOOP; 124 - 125 - -- Clean up extra whitespace and normalize 126 - result_text := regexp_replace(trim(result_text), '\s+', ' ', 'g'); 127 - 128 - -- Remove trailing punctuation that might be left after removal 129 - result_text := regexp_replace(result_text, '[,;:\-–—]\s*$', '', 'g'); 130 - result_text := trim(result_text); 131 - 132 - -- Ensure we don't return an empty string 133 - IF length(result_text) = 0 THEN 134 - RETURN name_text; 135 - END IF; 136 - 137 - RETURN result_text; 138 - END; 139 - $$ LANGUAGE plpgsql IMMUTABLE; 140 - 141 - -- Enhanced function to extract discriminant specifically for editions and versions with case-insensitive matching 142 - CREATE OR REPLACE FUNCTION extract_edition_discriminant(name_text TEXT) RETURNS TEXT AS $$ 143 - DECLARE 144 - -- Focused patterns for edition/version extraction with case-insensitive flags 145 - edition_patterns TEXT[] := ARRAY[ 146 - -- Edition patterns 147 - '(?i)$([^)]*edition[^)]*)$', 148 - '(?i)\[([^]]*edition[^]]*)\]', 149 - '(?i)\{([^}]*edition[^}]*)\}', 150 - '(?i)[-–—]\s*([^-–—]*edition[^-–—]*)$', 151 - '(?i):\s*([^:]*edition[^:]*)$', 152 - 153 - -- Version patterns 154 - '(?i)$([^)]*version[^)]*)$', 155 - '(?i)\[([^]]*version[^]]*)\]', 156 - '(?i)\{([^}]*version[^}]*)\}', 157 - '(?i)[-–—]\s*([^-–—]*version[^-–—]*)$', 158 - '(?i):\s*([^:]*version[^:]*)$', 159 - 160 - -- Remaster patterns 161 - '(?i)$([^)]*remaster[^)]*)$', 162 - '(?i)\[([^]]*remaster[^]]*)\]', 163 - '(?i)\{([^}]*remaster[^}]*)\}', 164 - '(?i)[-–—]\s*([^-–—]*remaster[^-–—]*)$', 165 - '(?i):\s*([^:]*remaster[^:]*)$', 166 - 167 - -- Year-based patterns 168 - '(?i)$([^)]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release)[^)]*)$', 169 - '(?i)\[([^]]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release)[^]]*)\]', 170 - '(?i)\{([^}]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release)[^}]*)\}' 171 - ]; 172 - 173 - pattern TEXT; 174 - match_result TEXT; 175 - BEGIN 176 - -- Return early if input is null or empty 177 - IF name_text IS NULL OR trim(name_text) = '' THEN 178 - RETURN NULL; 179 - END IF; 180 - 181 - -- Try edition-specific patterns first 182 - FOREACH pattern IN ARRAY edition_patterns 183 - LOOP 184 - SELECT substring(name_text FROM pattern) INTO match_result; 185 - IF match_result IS NOT NULL AND length(trim(match_result)) > 0 THEN 186 - match_result := trim(match_result); 187 - match_result := regexp_replace(match_result, '^[^\w]+|[^\w]+$', '', 'g'); 188 - IF length(trim(match_result)) > 0 THEN 189 - RETURN match_result; 190 - END IF; 191 - END IF; 192 - END LOOP; 193 - 194 - RETURN NULL; 195 - END; 196 - $$ LANGUAGE plpgsql IMMUTABLE; 197 - 198 - -- Update existing records with newly extracted discriminants (case-insensitive) 199 - UPDATE recordings 200 - SET discriminant = extract_discriminant(name) 201 - WHERE discriminant IS NULL 202 - AND extract_discriminant(name) IS NOT NULL; 203 - 204 - UPDATE releases 205 - SET discriminant = extract_discriminant(name) 206 - WHERE discriminant IS NULL 207 - AND extract_discriminant(name) IS NOT NULL; 208 - 209 - UPDATE plays 210 - SET track_discriminant = extract_discriminant(track_name) 211 - WHERE track_discriminant IS NULL 212 - AND extract_discriminant(track_name) IS NOT NULL; 213 - 214 - UPDATE plays 215 - SET release_discriminant = extract_discriminant(release_name) 216 - WHERE release_discriminant IS NULL 217 - AND release_name IS NOT NULL 218 - AND extract_discriminant(release_name) IS NOT NULL; 219 - 220 - -- Update comments for the enhanced functions 221 - COMMENT ON FUNCTION extract_discriminant IS 'Enhanced case-insensitive discriminant extraction supporting comprehensive edition/version patterns including parentheses, brackets, braces, dashes, and colons'; 222 - COMMENT ON FUNCTION get_base_name IS 'Enhanced case-insensitive base name extraction removing comprehensive discriminant patterns to enable proper grouping'; 223 - COMMENT ON FUNCTION extract_edition_discriminant IS 'Specialized case-insensitive function for extracting edition and version discriminants with focused patterns'; 224 - 225 - -- Refresh materialized views to reflect the case-insensitive improvements 226 - REFRESH MATERIALIZED VIEW mv_release_play_counts; 227 - REFRESH MATERIALIZED VIEW mv_recording_play_counts; 228 - 229 - -- Update discriminant analysis view to include case-insensitive results 230 - DROP VIEW IF EXISTS discriminant_analysis; 231 - CREATE OR REPLACE VIEW discriminant_analysis AS 232 - SELECT 233 - 'recordings' as table_name, 234 - name as original_name, 235 - discriminant, 236 - get_base_name(name) as base_name, 237 - extract_discriminant(name) as extracted_discriminant, 238 - extract_edition_discriminant(name) as edition_discriminant 239 - FROM recordings 240 - WHERE name IS NOT NULL 241 - UNION ALL 242 - SELECT 243 - 'releases' as table_name, 244 - name as original_name, 245 - discriminant, 246 - get_base_name(name) as base_name, 247 - extract_discriminant(name) as extracted_discriminant, 248 - extract_edition_discriminant(name) as edition_discriminant 249 - FROM releases 250 - WHERE name IS NOT NULL; 251 - 252 - COMMENT ON VIEW discriminant_analysis IS 'Analysis view showing case-insensitive discriminant extraction results for quality assessment and debugging';

···