···11--- Initial comprehensive schema for Teal music platform
22--- Based on services/cadet/sql/base.sql
33-44-CREATE TABLE artists (
55- mbid UUID PRIMARY KEY,
66- name TEXT NOT NULL,
77- play_count INTEGER DEFAULT 0
88-);
99-1010--- releases are synologous to 'albums'
1111-CREATE TABLE releases (
1212- mbid UUID PRIMARY KEY,
1313- name TEXT NOT NULL,
1414- play_count INTEGER DEFAULT 0
1515-);
1616-1717--- recordings are synologous to 'tracks' BUT tracks can be in multiple releases!
1818-CREATE TABLE recordings (
1919- mbid UUID PRIMARY KEY,
2020- name TEXT NOT NULL,
2121- play_count INTEGER DEFAULT 0
2222-);
2323-2424-CREATE TABLE plays (
2525- uri TEXT PRIMARY KEY,
2626- did TEXT NOT NULL,
2727- rkey TEXT NOT NULL,
2828- cid TEXT NOT NULL,
2929- isrc TEXT,
3030- duration INTEGER,
3131- track_name TEXT NOT NULL,
3232- played_time TIMESTAMP WITH TIME ZONE,
3333- processed_time TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
3434- release_mbid UUID,
3535- release_name TEXT,
3636- recording_mbid UUID,
3737- submission_client_agent TEXT,
3838- music_service_base_domain TEXT,
3939- origin_url TEXT,
4040- FOREIGN KEY (release_mbid) REFERENCES releases (mbid),
4141- FOREIGN KEY (recording_mbid) REFERENCES recordings (mbid)
4242-);
4343-4444-CREATE INDEX idx_plays_release_mbid ON plays (release_mbid);
4545-CREATE INDEX idx_plays_recording_mbid ON plays (recording_mbid);
4646-CREATE INDEX idx_plays_played_time ON plays (played_time);
4747-CREATE INDEX idx_plays_did ON plays (did);
4848-4949-CREATE TABLE play_to_artists (
5050- play_uri TEXT, -- references plays(uri)
5151- artist_mbid UUID REFERENCES artists (mbid),
5252- artist_name TEXT, -- storing here for ease of use when joining
5353- PRIMARY KEY (play_uri, artist_mbid),
5454- FOREIGN KEY (play_uri) REFERENCES plays (uri)
5555-);
5656-5757-CREATE INDEX idx_play_to_artists_artist ON play_to_artists (artist_mbid);
5858-5959--- Profiles table
6060-CREATE TABLE profiles (
6161- did TEXT PRIMARY KEY,
6262- handle TEXT,
6363- display_name TEXT,
6464- description TEXT,
6565- description_facets JSONB,
6666- avatar TEXT, -- IPLD of the image, bafy...
6767- banner TEXT,
6868- created_at TIMESTAMP WITH TIME ZONE
6969-);
7070-7171--- User featured items table
7272-CREATE TABLE featured_items (
7373- did TEXT PRIMARY KEY,
7474- mbid TEXT NOT NULL,
7575- type TEXT NOT NULL
7676-);
7777-7878--- Statii table (status records)
7979-CREATE TABLE statii (
8080- uri TEXT PRIMARY KEY,
8181- did TEXT NOT NULL,
8282- rkey TEXT NOT NULL,
8383- cid TEXT NOT NULL,
8484- record JSONB NOT NULL,
8585- indexed_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
8686-);
8787-8888-CREATE INDEX idx_statii_did_rkey ON statii (did, rkey);
8989-9090--- Materialized view for artists' play counts
9191-CREATE MATERIALIZED VIEW mv_artist_play_counts AS
9292-SELECT
9393- a.mbid AS artist_mbid,
9494- a.name AS artist_name,
9595- COUNT(p.uri) AS play_count
9696-FROM
9797- artists a
9898- LEFT JOIN play_to_artists pta ON a.mbid = pta.artist_mbid
9999- LEFT JOIN plays p ON p.uri = pta.play_uri
100100-GROUP BY
101101- a.mbid,
102102- a.name;
103103-104104-CREATE UNIQUE INDEX idx_mv_artist_play_counts ON mv_artist_play_counts (artist_mbid);
105105-106106--- Materialized view for releases' play counts
107107-CREATE MATERIALIZED VIEW mv_release_play_counts AS
108108-SELECT
109109- r.mbid AS release_mbid,
110110- r.name AS release_name,
111111- COUNT(p.uri) AS play_count
112112-FROM
113113- releases r
114114- LEFT JOIN plays p ON p.release_mbid = r.mbid
115115-GROUP BY
116116- r.mbid,
117117- r.name;
118118-119119-CREATE UNIQUE INDEX idx_mv_release_play_counts ON mv_release_play_counts (release_mbid);
120120-121121--- Materialized view for recordings' play counts
122122-CREATE MATERIALIZED VIEW mv_recording_play_counts AS
123123-SELECT
124124- rec.mbid AS recording_mbid,
125125- rec.name AS recording_name,
126126- COUNT(p.uri) AS play_count
127127-FROM
128128- recordings rec
129129- LEFT JOIN plays p ON p.recording_mbid = rec.mbid
130130-GROUP BY
131131- rec.mbid,
132132- rec.name;
133133-134134-CREATE UNIQUE INDEX idx_mv_recording_play_counts ON mv_recording_play_counts (recording_mbid);
135135-136136--- Global play count materialized view
137137-CREATE MATERIALIZED VIEW mv_global_play_count AS
138138-SELECT
139139- COUNT(uri) AS total_plays,
140140- COUNT(DISTINCT did) AS unique_listeners
141141-FROM plays;
142142-143143-CREATE UNIQUE INDEX idx_mv_global_play_count ON mv_global_play_count(total_plays);
144144-145145--- Top artists in the last 30 days
146146-CREATE MATERIALIZED VIEW mv_top_artists_30days AS
147147-SELECT
148148- a.mbid AS artist_mbid,
149149- a.name AS artist_name,
150150- COUNT(p.uri) AS play_count
151151-FROM artists a
152152-INNER JOIN play_to_artists pta ON a.mbid = pta.artist_mbid
153153-INNER JOIN plays p ON p.uri = pta.play_uri
154154-WHERE p.played_time >= NOW() - INTERVAL '30 days'
155155-GROUP BY a.mbid, a.name
156156-ORDER BY COUNT(p.uri) DESC;
157157-158158--- Top releases in the last 30 days
159159-CREATE MATERIALIZED VIEW mv_top_releases_30days AS
160160-SELECT
161161- r.mbid AS release_mbid,
162162- r.name AS release_name,
163163- COUNT(p.uri) AS play_count
164164-FROM releases r
165165-INNER JOIN plays p ON p.release_mbid = r.mbid
166166-WHERE p.played_time >= NOW() - INTERVAL '30 days'
167167-GROUP BY r.mbid, r.name
168168-ORDER BY COUNT(p.uri) DESC;
169169-170170--- Top artists for user in the last 30 days
171171-CREATE MATERIALIZED VIEW mv_top_artists_for_user_30days AS
172172-SELECT
173173- prof.did,
174174- a.mbid AS artist_mbid,
175175- a.name AS artist_name,
176176- COUNT(p.uri) AS play_count
177177-FROM artists a
178178-INNER JOIN play_to_artists pta ON a.mbid = pta.artist_mbid
179179-INNER JOIN plays p ON p.uri = pta.play_uri
180180-INNER JOIN profiles prof ON prof.did = p.did
181181-WHERE p.played_time >= NOW() - INTERVAL '30 days'
182182-GROUP BY prof.did, a.mbid, a.name
183183-ORDER BY COUNT(p.uri) DESC;
184184-185185--- Top artists for user in the last 7 days
186186-CREATE MATERIALIZED VIEW mv_top_artists_for_user_7days AS
187187-SELECT
188188- prof.did,
189189- a.mbid AS artist_mbid,
190190- a.name AS artist_name,
191191- COUNT(p.uri) AS play_count
192192-FROM artists a
193193-INNER JOIN play_to_artists pta ON a.mbid = pta.artist_mbid
194194-INNER JOIN plays p ON p.uri = pta.play_uri
195195-INNER JOIN profiles prof ON prof.did = p.did
196196-WHERE p.played_time >= NOW() - INTERVAL '7 days'
197197-GROUP BY prof.did, a.mbid, a.name
198198-ORDER BY COUNT(p.uri) DESC;
199199-200200--- Top releases for user in the last 30 days
201201-CREATE MATERIALIZED VIEW mv_top_releases_for_user_30days AS
202202-SELECT
203203- prof.did,
204204- r.mbid AS release_mbid,
205205- r.name AS release_name,
206206- COUNT(p.uri) AS play_count
207207-FROM releases r
208208-INNER JOIN plays p ON p.release_mbid = r.mbid
209209-INNER JOIN profiles prof ON prof.did = p.did
210210-WHERE p.played_time >= NOW() - INTERVAL '30 days'
211211-GROUP BY prof.did, r.mbid, r.name
212212-ORDER BY COUNT(p.uri) DESC;
213213-214214--- Top releases for user in the last 7 days
215215-CREATE MATERIALIZED VIEW mv_top_releases_for_user_7days AS
216216-SELECT
217217- prof.did,
218218- r.mbid AS release_mbid,
219219- r.name AS release_name,
220220- COUNT(p.uri) AS play_count
221221-FROM releases r
222222-INNER JOIN plays p ON p.release_mbid = r.mbid
223223-INNER JOIN profiles prof ON prof.did = p.did
224224-WHERE p.played_time >= NOW() - INTERVAL '7 days'
225225-GROUP BY prof.did, r.mbid, r.name
226226-ORDER BY COUNT(p.uri) DESC;
···11--- CAR import functionality tables
22--- For handling AT Protocol CAR file imports and processing
33-44--- Tracks uploaded CAR files that are queued for processing
55-CREATE TABLE IF NOT EXISTS car_import_requests (
66- import_id TEXT PRIMARY KEY,
77- car_data_base64 TEXT NOT NULL,
88- status TEXT NOT NULL DEFAULT 'pending', -- pending, processing, completed, failed
99- created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
1010- processed_at TIMESTAMP WITH TIME ZONE,
1111- error_message TEXT,
1212- file_size_bytes INTEGER,
1313- block_count INTEGER,
1414- extracted_records_count INTEGER DEFAULT 0
1515-);
1616-1717-CREATE INDEX idx_car_import_requests_status ON car_import_requests (status);
1818-CREATE INDEX idx_car_import_requests_created_at ON car_import_requests (created_at);
1919-2020--- Tracks raw IPLD blocks extracted from CAR files
2121-CREATE TABLE IF NOT EXISTS car_blocks (
2222- cid TEXT PRIMARY KEY,
2323- import_id TEXT NOT NULL REFERENCES car_import_requests(import_id),
2424- block_data BYTEA NOT NULL,
2525- decoded_successfully BOOLEAN DEFAULT FALSE,
2626- collection_type TEXT, -- e.g., 'fm.teal.alpha.feed.play', 'commit', etc.
2727- created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
2828-);
2929-3030-CREATE INDEX idx_car_blocks_import_id ON car_blocks (import_id);
3131-CREATE INDEX idx_car_blocks_collection_type ON car_blocks (collection_type);
3232-3333--- Tracks records extracted from CAR imports that were successfully processed
3434-CREATE TABLE IF NOT EXISTS car_extracted_records (
3535- id SERIAL PRIMARY KEY,
3636- import_id TEXT NOT NULL REFERENCES car_import_requests(import_id),
3737- cid TEXT NOT NULL REFERENCES car_blocks(cid),
3838- collection_type TEXT NOT NULL,
3939- record_uri TEXT, -- AT URI if applicable (e.g., for play records)
4040- synthetic_did TEXT, -- DID assigned for CAR imports (e.g., 'car-import:123')
4141- rkey TEXT,
4242- extracted_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
4343- processing_notes TEXT
4444-);
4545-4646-CREATE INDEX idx_car_extracted_records_import_id ON car_extracted_records (import_id);
4747-CREATE INDEX idx_car_extracted_records_collection_type ON car_extracted_records (collection_type);
4848-CREATE INDEX idx_car_extracted_records_record_uri ON car_extracted_records (record_uri);
4949-5050--- Tracks import metadata and commit information
5151-CREATE TABLE IF NOT EXISTS car_import_metadata (
5252- import_id TEXT NOT NULL REFERENCES car_import_requests(import_id),
5353- metadata_key TEXT NOT NULL,
5454- metadata_value JSONB NOT NULL,
5555- created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
5656- PRIMARY KEY (import_id, metadata_key)
5757-);
5858-5959-CREATE INDEX idx_car_import_metadata_key ON car_import_metadata (metadata_key);
···11--- Migration to support artists without MusicBrainz IDs
22--- This allows the system to comply with the Teal lexicon where only trackName is required
33-44--- Add a field to plays table to store raw artist names for records without MBIDs
55-ALTER TABLE plays ADD COLUMN artist_names_raw JSONB;
66-77--- Create a new artists table that doesn't require MBID as primary key
88-CREATE TABLE artists_extended (
99- id SERIAL PRIMARY KEY,
1010- mbid UUID UNIQUE, -- Optional MusicBrainz ID
1111- name TEXT NOT NULL,
1212- name_normalized TEXT GENERATED ALWAYS AS (LOWER(TRIM(name))) STORED,
1313- play_count INTEGER DEFAULT 0,
1414- created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
1515- updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
1616-);
1717-1818--- Create index for efficient lookups
1919-CREATE INDEX idx_artists_extended_mbid ON artists_extended (mbid) WHERE mbid IS NOT NULL;
2020-CREATE INDEX idx_artists_extended_name_normalized ON artists_extended (name_normalized);
2121-CREATE UNIQUE INDEX idx_artists_extended_name_unique ON artists_extended (name_normalized) WHERE mbid IS NULL;
2222-2323--- Create a new junction table that can handle both MBID and non-MBID artists
2424-CREATE TABLE play_to_artists_extended (
2525- play_uri TEXT NOT NULL REFERENCES plays(uri),
2626- artist_id INTEGER NOT NULL REFERENCES artists_extended(id),
2727- artist_name TEXT NOT NULL, -- Denormalized for performance
2828- PRIMARY KEY (play_uri, artist_id)
2929-);
3030-3131-CREATE INDEX idx_play_to_artists_extended_artist ON play_to_artists_extended (artist_id);
3232-3333--- Migrate existing data from old tables to new structure
3434-INSERT INTO artists_extended (mbid, name, play_count)
3535-SELECT mbid, name, play_count FROM artists;
3636-3737-INSERT INTO play_to_artists_extended (play_uri, artist_id, artist_name)
3838-SELECT
3939- pta.play_uri,
4040- ae.id,
4141- pta.artist_name
4242-FROM play_to_artists pta
4343-JOIN artists_extended ae ON ae.mbid = pta.artist_mbid;
4444-4545--- Update materialized views to use new structure
4646-DROP MATERIALIZED VIEW IF EXISTS mv_artist_play_counts;
4747-CREATE MATERIALIZED VIEW mv_artist_play_counts AS
4848-SELECT
4949- ae.id AS artist_id,
5050- ae.mbid AS artist_mbid,
5151- ae.name AS artist_name,
5252- COUNT(p.uri) AS play_count
5353-FROM
5454- artists_extended ae
5555- LEFT JOIN play_to_artists_extended ptae ON ae.id = ptae.artist_id
5656- LEFT JOIN plays p ON p.uri = ptae.play_uri
5757-GROUP BY
5858- ae.id, ae.mbid, ae.name;
5959-6060-CREATE UNIQUE INDEX idx_mv_artist_play_counts_new ON mv_artist_play_counts (artist_id);
6161-6262--- Update other materialized views that reference artists
6363-DROP MATERIALIZED VIEW IF EXISTS mv_top_artists_30days;
6464-CREATE MATERIALIZED VIEW mv_top_artists_30days AS
6565-SELECT
6666- ae.id AS artist_id,
6767- ae.mbid AS artist_mbid,
6868- ae.name AS artist_name,
6969- COUNT(p.uri) AS play_count
7070-FROM artists_extended ae
7171-INNER JOIN play_to_artists_extended ptae ON ae.id = ptae.artist_id
7272-INNER JOIN plays p ON p.uri = ptae.play_uri
7373-WHERE p.played_time >= NOW() - INTERVAL '30 days'
7474-GROUP BY ae.id, ae.mbid, ae.name
7575-ORDER BY COUNT(p.uri) DESC;
7676-7777-DROP MATERIALIZED VIEW IF EXISTS mv_top_artists_for_user_30days;
7878-CREATE MATERIALIZED VIEW mv_top_artists_for_user_30days AS
7979-SELECT
8080- prof.did,
8181- ae.id AS artist_id,
8282- ae.mbid AS artist_mbid,
8383- ae.name AS artist_name,
8484- COUNT(p.uri) AS play_count
8585-FROM artists_extended ae
8686-INNER JOIN play_to_artists_extended ptae ON ae.id = ptae.artist_id
8787-INNER JOIN plays p ON p.uri = ptae.play_uri
8888-INNER JOIN profiles prof ON prof.did = p.did
8989-WHERE p.played_time >= NOW() - INTERVAL '30 days'
9090-GROUP BY prof.did, ae.id, ae.mbid, ae.name
9191-ORDER BY COUNT(p.uri) DESC;
9292-9393-DROP MATERIALIZED VIEW IF EXISTS mv_top_artists_for_user_7days;
9494-CREATE MATERIALIZED VIEW mv_top_artists_for_user_7days AS
9595-SELECT
9696- prof.did,
9797- ae.id AS artist_id,
9898- ae.mbid AS artist_mbid,
9999- ae.name AS artist_name,
100100- COUNT(p.uri) AS play_count
101101-FROM artists_extended ae
102102-INNER JOIN play_to_artists_extended ptae ON ae.id = ptae.artist_id
103103-INNER JOIN plays p ON p.uri = ptae.play_uri
104104-INNER JOIN profiles prof ON prof.did = p.did
105105-WHERE p.played_time >= NOW() - INTERVAL '7 days'
106106-GROUP BY prof.did, ae.id, ae.mbid, ae.name
107107-ORDER BY COUNT(p.uri) DESC;
108108-109109--- Comment explaining the migration strategy
110110-COMMENT ON TABLE artists_extended IS 'Extended artists table that supports both MusicBrainz and non-MusicBrainz artists. Uses serial ID as primary key with optional MBID.';
111111-COMMENT ON TABLE play_to_artists_extended IS 'Junction table linking plays to artists using the new artists_extended table structure.';
112112-COMMENT ON COLUMN plays.artist_names_raw IS 'Raw artist names as JSON array for plays without MusicBrainz data, used as fallback when artist relationships cannot be established.';
···11--- Migration to support synthetic MBIDs for artists without MusicBrainz data
22--- This ensures all artists have some form of ID while maintaining uniqueness
33-44--- Enable UUID extension for v5 UUID generation
55-CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
66-77--- Add a column to track MBID type (musicbrainz, synthetic, unknown)
88-ALTER TABLE artists_extended ADD COLUMN mbid_type TEXT DEFAULT 'unknown' NOT NULL;
99-1010--- Add check constraint for valid MBID types
1111-ALTER TABLE artists_extended ADD CONSTRAINT chk_mbid_type
1212- CHECK (mbid_type IN ('musicbrainz', 'synthetic', 'unknown'));
1313-1414--- Update existing records to set proper MBID type
1515-UPDATE artists_extended SET mbid_type = 'musicbrainz' WHERE mbid IS NOT NULL;
1616-1717--- Drop the unique constraint on name_normalized for null MBIDs since we'll handle duplicates differently
1818-DROP INDEX IF EXISTS idx_artists_extended_name_unique;
1919-2020--- Add index for efficient querying by MBID type
2121-CREATE INDEX idx_artists_extended_mbid_type ON artists_extended (mbid_type);
2222-2323--- Create a view to easily work with different artist types
2424-CREATE VIEW artists_with_type AS
2525-SELECT
2626- id,
2727- mbid,
2828- name,
2929- mbid_type,
3030- play_count,
3131- created_at,
3232- updated_at,
3333- -- For synthetic MBIDs, we can show the source name used for generation
3434- CASE
3535- WHEN mbid_type = 'synthetic' THEN 'Generated from: ' || name
3636- WHEN mbid_type = 'musicbrainz' THEN 'MusicBrainz: ' || mbid::text
3737- ELSE 'No MBID available'
3838- END as mbid_info
3939-FROM artists_extended;
4040-4141--- Update materialized views to include MBID type information
4242-DROP MATERIALIZED VIEW IF EXISTS mv_artist_play_counts;
4343-CREATE MATERIALIZED VIEW mv_artist_play_counts AS
4444-SELECT
4545- ae.id AS artist_id,
4646- ae.mbid AS artist_mbid,
4747- ae.name AS artist_name,
4848- ae.mbid_type,
4949- COUNT(p.uri) AS play_count
5050-FROM
5151- artists_extended ae
5252- LEFT JOIN play_to_artists_extended ptae ON ae.id = ptae.artist_id
5353- LEFT JOIN plays p ON p.uri = ptae.play_uri
5454-GROUP BY
5555- ae.id, ae.mbid, ae.name, ae.mbid_type;
5656-5757-CREATE UNIQUE INDEX idx_mv_artist_play_counts_with_type ON mv_artist_play_counts (artist_id);
5858-5959--- Add comments explaining the synthetic MBID system
6060-COMMENT ON COLUMN artists_extended.mbid_type IS 'Type of MBID: musicbrainz (real), synthetic (generated), or unknown (legacy data)';
6161-COMMENT ON COLUMN artists_extended.mbid IS 'MusicBrainz ID (for musicbrainz type) or synthetic UUID (for synthetic type)';
6262-COMMENT ON VIEW artists_with_type IS 'View that provides human-readable information about artist MBID sources';
6363-6464--- Add a function to generate synthetic MBIDs
6565-CREATE OR REPLACE FUNCTION generate_synthetic_mbid(artist_name TEXT) RETURNS UUID AS $$
6666-DECLARE
6767- namespace_uuid UUID := '6ba7b810-9dad-11d1-80b4-00c04fd430c8'; -- DNS namespace
6868- result_uuid UUID;
6969-BEGIN
7070- -- Generate deterministic UUID v5 based on artist name
7171- SELECT uuid_generate_v5(namespace_uuid, artist_name) INTO result_uuid;
7272- RETURN result_uuid;
7373-END;
7474-$$ LANGUAGE plpgsql IMMUTABLE;
7575-7676-COMMENT ON FUNCTION generate_synthetic_mbid IS 'Generates a deterministic UUID v5 for artist names without MusicBrainz IDs';
···11--- Migration to add fuzzy text matching capabilities
22--- This enables better artist name matching using trigram similarity
33-44--- Enable pg_trgm extension for trigram similarity matching
55-CREATE EXTENSION IF NOT EXISTS pg_trgm;
66-77--- Create indexes for efficient trigram matching on artist names
88-CREATE INDEX idx_artists_extended_name_trgm ON artists_extended USING gin (name gin_trgm_ops);
99-CREATE INDEX idx_artists_extended_name_normalized_trgm ON artists_extended USING gin (name_normalized gin_trgm_ops);
1010-1111--- Create a function to calculate comprehensive artist similarity
1212-CREATE OR REPLACE FUNCTION calculate_artist_similarity(
1313- input_name TEXT,
1414- existing_name TEXT,
1515- input_album TEXT DEFAULT NULL,
1616- existing_album TEXT DEFAULT NULL
1717-) RETURNS FLOAT AS $$
1818-DECLARE
1919- name_similarity FLOAT;
2020- album_similarity FLOAT := 0.0;
2121- final_score FLOAT;
2222-BEGIN
2323- -- Calculate trigram similarity for artist names
2424- name_similarity := similarity(LOWER(TRIM(input_name)), LOWER(TRIM(existing_name)));
2525-2626- -- Boost for exact matches after normalization
2727- IF LOWER(TRIM(regexp_replace(input_name, '[^a-zA-Z0-9\s]', '', 'g'))) =
2828- LOWER(TRIM(regexp_replace(existing_name, '[^a-zA-Z0-9\s]', '', 'g'))) THEN
2929- name_similarity := GREATEST(name_similarity, 0.95);
3030- END IF;
3131-3232- -- Factor in album similarity if both are provided
3333- IF input_album IS NOT NULL AND existing_album IS NOT NULL THEN
3434- album_similarity := similarity(LOWER(TRIM(input_album)), LOWER(TRIM(existing_album)));
3535- -- Weight: 80% name, 20% album
3636- final_score := (name_similarity * 0.8) + (album_similarity * 0.2);
3737- ELSE
3838- final_score := name_similarity;
3939- END IF;
4040-4141- RETURN final_score;
4242-END;
4343-$$ LANGUAGE plpgsql IMMUTABLE;
4444-4545--- Create a view for fuzzy artist matching with confidence scores
4646-CREATE VIEW fuzzy_artist_matches AS
4747-SELECT DISTINCT
4848- ae1.id as query_artist_id,
4949- ae1.name as query_artist_name,
5050- ae1.mbid_type as query_mbid_type,
5151- ae2.id as match_artist_id,
5252- ae2.name as match_artist_name,
5353- ae2.mbid as match_mbid,
5454- ae2.mbid_type as match_mbid_type,
5555- similarity(LOWER(TRIM(ae1.name)), LOWER(TRIM(ae2.name))) as name_similarity,
5656- CASE
5757- WHEN ae2.mbid_type = 'musicbrainz' THEN 'upgrade_to_mb'
5858- WHEN ae1.mbid_type = 'musicbrainz' AND ae2.mbid_type = 'synthetic' THEN 'consolidate_to_mb'
5959- ELSE 'merge_synthetic'
6060- END as match_action
6161-FROM artists_extended ae1
6262-CROSS JOIN artists_extended ae2
6363-WHERE ae1.id != ae2.id
6464-AND similarity(LOWER(TRIM(ae1.name)), LOWER(TRIM(ae2.name))) > 0.8
6565-AND (
6666- ae1.mbid_type = 'synthetic' OR ae2.mbid_type = 'musicbrainz'
6767-);
6868-6969--- Add comments
7070-COMMENT ON EXTENSION pg_trgm IS 'Trigram extension for fuzzy text matching';
7171-COMMENT ON INDEX idx_artists_extended_name_trgm IS 'GIN index for trigram similarity on artist names';
7272-COMMENT ON FUNCTION calculate_artist_similarity IS 'Calculates similarity score between artists considering name and optional album context';
7373-COMMENT ON VIEW fuzzy_artist_matches IS 'Shows potential artist matches with confidence scores and recommended actions';
7474-7575--- Create a function to suggest artist consolidations
7676-CREATE OR REPLACE FUNCTION suggest_artist_consolidations(min_similarity FLOAT DEFAULT 0.9)
7777-RETURNS TABLE(
7878- action TEXT,
7979- synthetic_artist TEXT,
8080- target_artist TEXT,
8181- similarity_score FLOAT,
8282- synthetic_plays INTEGER,
8383- target_plays INTEGER
8484-) AS $$
8585-BEGIN
8686- RETURN QUERY
8787- SELECT
8888- fam.match_action as action,
8989- fam.query_artist_name as synthetic_artist,
9090- fam.match_artist_name as target_artist,
9191- fam.name_similarity as similarity_score,
9292- (SELECT COUNT(*)::INTEGER FROM play_to_artists_extended WHERE artist_id = fam.query_artist_id) as synthetic_plays,
9393- (SELECT COUNT(*)::INTEGER FROM play_to_artists_extended WHERE artist_id = fam.match_artist_id) as target_plays
9494- FROM fuzzy_artist_matches fam
9595- WHERE fam.name_similarity >= min_similarity
9696- AND fam.match_action = 'upgrade_to_mb'
9797- ORDER BY fam.name_similarity DESC, synthetic_plays DESC;
9898-END;
9999-$$ LANGUAGE plpgsql;
100100-101101-COMMENT ON FUNCTION suggest_artist_consolidations IS 'Returns suggestions for consolidating synthetic artists with MusicBrainz artists based on similarity';
···11--- Migration to add discriminant fields for track and release variants
22--- This enables proper handling of different versions while maintaining grouping capabilities
33-44--- Add discriminant fields to plays table
55-ALTER TABLE plays ADD COLUMN track_discriminant TEXT;
66-ALTER TABLE plays ADD COLUMN release_discriminant TEXT;
77-88--- Add discriminant field to releases table
99-ALTER TABLE releases ADD COLUMN discriminant TEXT;
1010-1111--- Add discriminant field to recordings table
1212-ALTER TABLE recordings ADD COLUMN discriminant TEXT;
1313-1414--- Create indexes for efficient searching and filtering
1515-CREATE INDEX idx_plays_track_discriminant ON plays (track_discriminant);
1616-CREATE INDEX idx_plays_release_discriminant ON plays (release_discriminant);
1717-CREATE INDEX idx_releases_discriminant ON releases (discriminant);
1818-CREATE INDEX idx_recordings_discriminant ON recordings (discriminant);
1919-2020--- Create composite indexes for grouping by base name + discriminant
2121-CREATE INDEX idx_plays_track_name_discriminant ON plays (track_name, track_discriminant);
2222-CREATE INDEX idx_plays_release_name_discriminant ON plays (release_name, release_discriminant);
2323-2424--- Update materialized views to include discriminant information
2525-DROP MATERIALIZED VIEW IF EXISTS mv_release_play_counts;
2626-CREATE MATERIALIZED VIEW mv_release_play_counts AS
2727-SELECT
2828- r.mbid AS release_mbid,
2929- r.name AS release_name,
3030- r.discriminant AS release_discriminant,
3131- COUNT(p.uri) AS play_count
3232-FROM
3333- releases r
3434- LEFT JOIN plays p ON p.release_mbid = r.mbid
3535-GROUP BY
3636- r.mbid, r.name, r.discriminant;
3737-3838-CREATE UNIQUE INDEX idx_mv_release_play_counts_discriminant ON mv_release_play_counts (release_mbid);
3939-4040-DROP MATERIALIZED VIEW IF EXISTS mv_recording_play_counts;
4141-CREATE MATERIALIZED VIEW mv_recording_play_counts AS
4242-SELECT
4343- rec.mbid AS recording_mbid,
4444- rec.name AS recording_name,
4545- rec.discriminant AS recording_discriminant,
4646- COUNT(p.uri) AS play_count
4747-FROM
4848- recordings rec
4949- LEFT JOIN plays p ON p.recording_mbid = rec.mbid
5050-GROUP BY
5151- rec.mbid, rec.name, rec.discriminant;
5252-5353-CREATE UNIQUE INDEX idx_mv_recording_play_counts_discriminant ON mv_recording_play_counts (recording_mbid);
5454-5555--- Create views for analyzing track/release variants
5656-CREATE VIEW track_variants AS
5757-SELECT
5858- track_name,
5959- track_discriminant,
6060- COUNT(*) AS play_count,
6161- COUNT(DISTINCT did) AS unique_listeners,
6262- COUNT(DISTINCT recording_mbid) AS unique_recordings
6363-FROM plays
6464-WHERE track_name IS NOT NULL
6565-GROUP BY track_name, track_discriminant
6666-ORDER BY track_name, play_count DESC;
6767-6868-CREATE VIEW release_variants AS
6969-SELECT
7070- release_name,
7171- release_discriminant,
7272- COUNT(*) AS play_count,
7373- COUNT(DISTINCT did) AS unique_listeners,
7474- COUNT(DISTINCT release_mbid) AS unique_releases
7575-FROM plays
7676-WHERE release_name IS NOT NULL
7777-GROUP BY release_name, release_discriminant
7878-ORDER BY release_name, play_count DESC;
7979-8080--- Create function to extract potential discriminants from existing names
8181-CREATE OR REPLACE FUNCTION extract_discriminant(name_text TEXT) RETURNS TEXT AS $$
8282-DECLARE
8383- discriminant_patterns TEXT[] := ARRAY[
8484- '\(([^)]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus).*?)\)',
8585- '\[([^]]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus).*?)\]',
8686- '\{([^}]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus).*?)\}'
8787- ];
8888- pattern TEXT;
8989- match_result TEXT;
9090-BEGIN
9191- -- Try each pattern to find discriminant information
9292- FOREACH pattern IN ARRAY discriminant_patterns
9393- LOOP
9494- SELECT substring(name_text FROM pattern) INTO match_result;
9595- IF match_result IS NOT NULL AND length(trim(match_result)) > 0 THEN
9696- RETURN trim(match_result);
9797- END IF;
9898- END LOOP;
9999-100100- RETURN NULL;
101101-END;
102102-$$ LANGUAGE plpgsql IMMUTABLE;
103103-104104--- Create function to get base name without discriminant
105105-CREATE OR REPLACE FUNCTION get_base_name(name_text TEXT) RETURNS TEXT AS $$
106106-DECLARE
107107- cleanup_patterns TEXT[] := ARRAY[
108108- '\s*\([^)]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus).*?\)\s*',
109109- '\s*\[[^]]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus).*?\]\s*',
110110- '\s*\{[^}]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus).*?\}\s*'
111111- ];
112112- pattern TEXT;
113113- result_text TEXT := name_text;
114114-BEGIN
115115- -- Remove discriminant patterns to get base name
116116- FOREACH pattern IN ARRAY cleanup_patterns
117117- LOOP
118118- result_text := regexp_replace(result_text, pattern, ' ', 'gi');
119119- END LOOP;
120120-121121- -- Clean up extra whitespace
122122- result_text := regexp_replace(trim(result_text), '\s+', ' ', 'g');
123123-124124- RETURN result_text;
125125-END;
126126-$$ LANGUAGE plpgsql IMMUTABLE;
127127-128128--- Add comments explaining the discriminant system
129129-COMMENT ON COLUMN plays.track_discriminant IS 'Distinguishing information for track variants (e.g., "Acoustic Version", "Live at Wembley", "Radio Edit")';
130130-COMMENT ON COLUMN plays.release_discriminant IS 'Distinguishing information for release variants (e.g., "Deluxe Edition", "Remastered", "2023 Remaster")';
131131-COMMENT ON COLUMN releases.discriminant IS 'Distinguishing information for release variants to enable proper grouping';
132132-COMMENT ON COLUMN recordings.discriminant IS 'Distinguishing information for recording variants to enable proper grouping';
133133-134134-COMMENT ON VIEW track_variants IS 'Shows all variants of tracks with their play counts and unique listeners';
135135-COMMENT ON VIEW release_variants IS 'Shows all variants of releases with their play counts and unique listeners';
136136-137137-COMMENT ON FUNCTION extract_discriminant IS 'Extracts discriminant information from track/release names for migration purposes';
138138-COMMENT ON FUNCTION get_base_name IS 'Returns the base name without discriminant information for grouping purposes';
···11--- Enhanced discriminant extraction with comprehensive edition/version patterns
22--- This migration improves the auto-population of discriminants for better metadata handling
33-44--- Drop existing functions to replace them with enhanced versions
55-DROP FUNCTION IF EXISTS extract_discriminant(TEXT);
66-DROP FUNCTION IF EXISTS get_base_name(TEXT);
77-88--- Enhanced function to extract discriminants with comprehensive patterns
99-CREATE OR REPLACE FUNCTION extract_discriminant(name_text TEXT) RETURNS TEXT AS $$
1010-DECLARE
1111- -- Comprehensive patterns for discriminant extraction
1212- discriminant_patterns TEXT[] := ARRAY[
1313- -- Parentheses patterns
1414- '\(([^)]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray|hdtv|web|retail|promo|single|ep|lp|maxi|mini|radio|club|dance|house|techno|trance|ambient|classical|jazz|folk|country|rock|pop|metal|punk|indie|alternative).*?)\)',
1515- '\(([^)]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?)\)',
1616- '\(([^)]*(?:vol\.|volume|pt\.|part|disc|disk|cd)\s*\d+.*?)\)',
1717- '\(([^)]*(?:feat\.|featuring|ft\.|with|vs\.|versus|&|and)\s+.*?)\)',
1818- '\(([^)]*(?:from|soundtrack|ost|score|theme).*?)\)',
1919-2020- -- Brackets patterns
2121- '\[([^]]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray|hdtv|web|retail|promo|single|ep|lp|maxi|mini|radio|club|dance|house|techno|trance|ambient|classical|jazz|folk|country|rock|pop|metal|punk|indie|alternative).*?)\]',
2222- '\[([^]]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?)\]',
2323- '\[([^]]*(?:vol\.|volume|pt\.|part|disc|disk|cd)\s*\d+.*?)\]',
2424- '\[([^]]*(?:feat\.|featuring|ft\.|with|vs\.|versus|&|and)\s+.*?)\]',
2525- '\[([^]]*(?:from|soundtrack|ost|score|theme).*?)\]',
2626-2727- -- Braces patterns
2828- '\{([^}]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray|hdtv|web|retail|promo|single|ep|lp|maxi|mini|radio|club|dance|house|techno|trance|ambient|classical|jazz|folk|country|rock|pop|metal|punk|indie|alternative).*?)\}',
2929- '\{([^}]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?)\}',
3030- '\{([^}]*(?:vol\.|volume|pt\.|part|disc|disk|cd)\s*\d+.*?)\}',
3131- '\{([^}]*(?:feat\.|featuring|ft\.|with|vs\.|versus|&|and)\s+.*?)\}',
3232- '\{([^}]*(?:from|soundtrack|ost|score|theme).*?)\}',
3333-3434- -- Dash/hyphen patterns (common for editions)
3535- '[-–—]\s*([^-–—]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray).*?)$',
3636- '[-–—]\s*(\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?$',
3737-3838- -- Colon patterns (common for subtitles and versions)
3939- ':\s*([^:]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive).*?)$',
4040- ':\s*(\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?$'
4141- ];
4242-4343- pattern TEXT;
4444- match_result TEXT;
4545-BEGIN
4646- -- Return early if input is null or empty
4747- IF name_text IS NULL OR trim(name_text) = '' THEN
4848- RETURN NULL;
4949- END IF;
5050-5151- -- Try each pattern to find discriminant information
5252- FOREACH pattern IN ARRAY discriminant_patterns
5353- LOOP
5454- SELECT substring(name_text FROM pattern COLLATE "C") INTO match_result;
5555- IF match_result IS NOT NULL AND length(trim(match_result)) > 0 THEN
5656- -- Clean up the match result
5757- match_result := trim(match_result);
5858- -- Remove leading/trailing punctuation
5959- match_result := regexp_replace(match_result, '^[^\w]+|[^\w]+$', '', 'g');
6060- -- Ensure it's not just whitespace or empty after cleanup
6161- IF length(trim(match_result)) > 0 THEN
6262- RETURN match_result;
6363- END IF;
6464- END IF;
6565- END LOOP;
6666-6767- RETURN NULL;
6868-END;
6969-$$ LANGUAGE plpgsql IMMUTABLE;
7070-7171--- Enhanced function to get base name without discriminant
7272-CREATE OR REPLACE FUNCTION get_base_name(name_text TEXT) RETURNS TEXT AS $$
7373-DECLARE
7474- -- Comprehensive cleanup patterns matching the extraction patterns
7575- cleanup_patterns TEXT[] := ARRAY[
7676- -- Remove parentheses content
7777- '\s*\([^)]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray|hdtv|web|retail|promo|single|ep|lp|maxi|mini|radio|club|dance|house|techno|trance|ambient|classical|jazz|folk|country|rock|pop|metal|punk|indie|alternative).*?\)\s*',
7878- '\s*\([^)]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?\)\s*',
7979- '\s*\([^)]*(?:vol\.|volume|pt\.|part|disc|disk|cd)\s*\d+.*?\)\s*',
8080- '\s*\([^)]*(?:feat\.|featuring|ft\.|with|vs\.|versus|&|and)\s+.*?\)\s*',
8181- '\s*\([^)]*(?:from|soundtrack|ost|score|theme).*?\)\s*',
8282-8383- -- Remove brackets content
8484- '\s*\[[^]]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray|hdtv|web|retail|promo|single|ep|lp|maxi|mini|radio|club|dance|house|techno|trance|ambient|classical|jazz|folk|country|rock|pop|metal|punk|indie|alternative).*?\]\s*',
8585- '\s*\[[^]]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?\]\s*',
8686- '\s*\[[^]]*(?:vol\.|volume|pt\.|part|disc|disk|cd)\s*\d+.*?\]\s*',
8787- '\s*\[[^]]*(?:feat\.|featuring|ft\.|with|vs\.|versus|&|and)\s+.*?\]\s*',
8888- '\s*\[[^]]*(?:from|soundtrack|ost|score|theme).*?\]\s*',
8989-9090- -- Remove braces content
9191- '\s*\{[^}]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray|hdtv|web|retail|promo|single|ep|lp|maxi|mini|radio|club|dance|house|techno|trance|ambient|classical|jazz|folk|country|rock|pop|metal|punk|indie|alternative).*?\}\s*',
9292- '\s*\{[^}]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?\}\s*',
9393- '\s*\{[^}]*(?:vol\.|volume|pt\.|part|disc|disk|cd)\s*\d+.*?\}\s*',
9494- '\s*\{[^}]*(?:feat\.|featuring|ft\.|with|vs\.|versus|&|and)\s+.*?\}\s*',
9595- '\s*\{[^}]*(?:from|soundtrack|ost|score|theme).*?\}\s*',
9696-9797- -- Remove dash/hyphen patterns
9898- '\s*[-–—]\s*[^-–—]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray).*?$',
9999- '\s*[-–—]\s*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?$',
100100-101101- -- Remove colon patterns
102102- '\s*:\s*[^:]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive).*?$',
103103- '\s*:\s*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?$'
104104- ];
105105-106106- pattern TEXT;
107107- result_text TEXT := name_text;
108108-BEGIN
109109- -- Return early if input is null or empty
110110- IF name_text IS NULL OR trim(name_text) = '' THEN
111111- RETURN name_text;
112112- END IF;
113113-114114- -- Remove discriminant patterns to get base name
115115- FOREACH pattern IN ARRAY cleanup_patterns
116116- LOOP
117117- result_text := regexp_replace(result_text, pattern, ' ', 'gi');
118118- END LOOP;
119119-120120- -- Clean up extra whitespace and normalize
121121- result_text := regexp_replace(trim(result_text), '\s+', ' ', 'g');
122122-123123- -- Remove trailing punctuation that might be left after removal
124124- result_text := regexp_replace(result_text, '[,;:\-–—]\s*$', '', 'g');
125125- result_text := trim(result_text);
126126-127127- -- Ensure we don't return an empty string
128128- IF length(result_text) = 0 THEN
129129- RETURN name_text;
130130- END IF;
131131-132132- RETURN result_text;
133133-END;
134134-$$ LANGUAGE plpgsql IMMUTABLE;
135135-136136--- Create function to extract discriminant specifically for editions and versions
137137-CREATE OR REPLACE FUNCTION extract_edition_discriminant(name_text TEXT) RETURNS TEXT AS $$
138138-DECLARE
139139- -- Focused patterns for edition/version extraction
140140- edition_patterns TEXT[] := ARRAY[
141141- -- Edition patterns
142142- '\(([^)]*edition[^)]*)\)',
143143- '\[([^]]*edition[^]]*)\]',
144144- '\{([^}]*edition[^}]*)\}',
145145- '[-–—]\s*([^-–—]*edition[^-–—]*)$',
146146- ':\s*([^:]*edition[^:]*)$',
147147-148148- -- Version patterns
149149- '\(([^)]*version[^)]*)\)',
150150- '\[([^]]*version[^]]*)\]',
151151- '\{([^}]*version[^}]*)\}',
152152- '[-–—]\s*([^-–—]*version[^-–—]*)$',
153153- ':\s*([^:]*version[^:]*)$',
154154-155155- -- Remaster patterns
156156- '\(([^)]*remaster[^)]*)\)',
157157- '\[([^]]*remaster[^]]*)\]',
158158- '\{([^}]*remaster[^}]*)\}',
159159- '[-–—]\s*([^-–—]*remaster[^-–—]*)$',
160160- ':\s*([^:]*remaster[^:]*)$',
161161-162162- -- Year-based patterns
163163- '\(([^)]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release)[^)]*)\)',
164164- '\[([^]]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release)[^]]*)\]',
165165- '\{([^}]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release)[^}]*)\}'
166166- ];
167167-168168- pattern TEXT;
169169- match_result TEXT;
170170-BEGIN
171171- -- Return early if input is null or empty
172172- IF name_text IS NULL OR trim(name_text) = '' THEN
173173- RETURN NULL;
174174- END IF;
175175-176176- -- Try edition-specific patterns first
177177- FOREACH pattern IN ARRAY edition_patterns
178178- LOOP
179179- SELECT substring(name_text FROM pattern COLLATE "C") INTO match_result;
180180- IF match_result IS NOT NULL AND length(trim(match_result)) > 0 THEN
181181- match_result := trim(match_result);
182182- match_result := regexp_replace(match_result, '^[^\w]+|[^\w]+$', '', 'g');
183183- IF length(trim(match_result)) > 0 THEN
184184- RETURN match_result;
185185- END IF;
186186- END IF;
187187- END LOOP;
188188-189189- RETURN NULL;
190190-END;
191191-$$ LANGUAGE plpgsql IMMUTABLE;
192192-193193--- Update recordings table to populate discriminants from existing names
194194-UPDATE recordings
195195-SET discriminant = extract_discriminant(name)
196196-WHERE discriminant IS NULL
197197- AND extract_discriminant(name) IS NOT NULL;
198198-199199--- Update releases table to populate discriminants from existing names
200200-UPDATE releases
201201-SET discriminant = extract_discriminant(name)
202202-WHERE discriminant IS NULL
203203- AND extract_discriminant(name) IS NOT NULL;
204204-205205--- Update plays table to populate discriminants from existing names where not already set
206206-UPDATE plays
207207-SET track_discriminant = extract_discriminant(track_name)
208208-WHERE track_discriminant IS NULL
209209- AND extract_discriminant(track_name) IS NOT NULL;
210210-211211-UPDATE plays
212212-SET release_discriminant = extract_discriminant(release_name)
213213-WHERE release_discriminant IS NULL
214214- AND release_name IS NOT NULL
215215- AND extract_discriminant(release_name) IS NOT NULL;
216216-217217--- Create indexes for efficient discriminant queries
218218-CREATE INDEX IF NOT EXISTS idx_recordings_name_discriminant ON recordings (name, discriminant);
219219-CREATE INDEX IF NOT EXISTS idx_releases_name_discriminant ON releases (name, discriminant);
220220-221221--- Add comments for the new function
222222-COMMENT ON FUNCTION extract_discriminant IS 'Enhanced discriminant extraction supporting comprehensive edition/version patterns including parentheses, brackets, braces, dashes, and colons';
223223-COMMENT ON FUNCTION get_base_name IS 'Enhanced base name extraction removing comprehensive discriminant patterns to enable proper grouping';
224224-COMMENT ON FUNCTION extract_edition_discriminant IS 'Specialized function for extracting edition and version discriminants with focused patterns';
225225-226226--- Create a view to show discriminant extraction results for analysis
227227-CREATE OR REPLACE VIEW discriminant_analysis AS
228228-SELECT
229229- 'recordings' as table_name,
230230- name as original_name,
231231- discriminant,
232232- get_base_name(name) as base_name,
233233- extract_discriminant(name) as extracted_discriminant,
234234- extract_edition_discriminant(name) as edition_discriminant
235235-FROM recordings
236236-WHERE name IS NOT NULL
237237-UNION ALL
238238-SELECT
239239- 'releases' as table_name,
240240- name as original_name,
241241- discriminant,
242242- get_base_name(name) as base_name,
243243- extract_discriminant(name) as extracted_discriminant,
244244- extract_edition_discriminant(name) as edition_discriminant
245245-FROM releases
246246-WHERE name IS NOT NULL;
247247-248248-COMMENT ON VIEW discriminant_analysis IS 'Analysis view showing discriminant extraction results for quality assessment and debugging';
249249-250250--- Refresh materialized views to include discriminant information
251251-REFRESH MATERIALIZED VIEW mv_release_play_counts;
252252-REFRESH MATERIALIZED VIEW mv_recording_play_counts;
253253-254254--- Create summary statistics for discriminant usage
255255-CREATE OR REPLACE VIEW discriminant_stats AS
256256-SELECT
257257- 'recordings' as entity_type,
258258- COUNT(*) as total_count,
259259- COUNT(CASE WHEN discriminant IS NOT NULL THEN 1 END) as with_discriminant,
260260- COUNT(CASE WHEN discriminant IS NULL AND extract_discriminant(name) IS NOT NULL THEN 1 END) as extractable_discriminant,
261261- ROUND(
262262- COUNT(CASE WHEN discriminant IS NOT NULL THEN 1 END) * 100.0 / COUNT(*), 2
263263- ) as discriminant_percentage
264264-FROM recordings
265265-UNION ALL
266266-SELECT
267267- 'releases' as entity_type,
268268- COUNT(*) as total_count,
269269- COUNT(CASE WHEN discriminant IS NOT NULL THEN 1 END) as with_discriminant,
270270- COUNT(CASE WHEN discriminant IS NULL AND extract_discriminant(name) IS NOT NULL THEN 1 END) as extractable_discriminant,
271271- ROUND(
272272- COUNT(CASE WHEN discriminant IS NOT NULL THEN 1 END) * 100.0 / COUNT(*), 2
273273- ) as discriminant_percentage
274274-FROM releases;
275275-276276-COMMENT ON VIEW discriminant_stats IS 'Statistics showing discriminant usage and extraction potential across entity types';
···11--- Fix case sensitivity in discriminant extraction patterns
22--- This migration updates the discriminant extraction functions to properly handle case-insensitive matching
33-44--- Drop dependent views first, then functions, then recreate everything
55-DROP VIEW IF EXISTS discriminant_analysis CASCADE;
66-DROP VIEW IF EXISTS discriminant_stats CASCADE;
77-88--- Drop existing functions to replace with case-insensitive versions
99-DROP FUNCTION IF EXISTS extract_discriminant(TEXT) CASCADE;
1010-DROP FUNCTION IF EXISTS get_base_name(TEXT) CASCADE;
1111-DROP FUNCTION IF EXISTS extract_edition_discriminant(TEXT) CASCADE;
1212-1313--- Enhanced function to extract discriminants with case-insensitive matching
1414-CREATE OR REPLACE FUNCTION extract_discriminant(name_text TEXT) RETURNS TEXT AS $$
1515-DECLARE
1616- -- Comprehensive patterns for discriminant extraction with case-insensitive flags
1717- discriminant_patterns TEXT[] := ARRAY[
1818- -- Parentheses patterns
1919- '(?i)\(([^)]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray|hdtv|web|retail|promo|single|ep|lp|maxi|mini|radio|club|dance|house|techno|trance|ambient|classical|jazz|folk|country|rock|pop|metal|punk|indie|alternative).*?)\)',
2020- '(?i)\(([^)]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?)\)',
2121- '(?i)\(([^)]*(?:vol\.|volume|pt\.|part|disc|disk|cd)\s*\d+.*?)\)',
2222- '(?i)\(([^)]*(?:feat\.|featuring|ft\.|with|vs\.|versus|&|and)\s+.*?)\)',
2323- '(?i)\(([^)]*(?:from|soundtrack|ost|score|theme).*?)\)',
2424-2525- -- Brackets patterns
2626- '(?i)\[([^]]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray|hdtv|web|retail|promo|single|ep|lp|maxi|mini|radio|club|dance|house|techno|trance|ambient|classical|jazz|folk|country|rock|pop|metal|punk|indie|alternative).*?)\]',
2727- '(?i)\[([^]]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?)\]',
2828- '(?i)\[([^]]*(?:vol\.|volume|pt\.|part|disc|disk|cd)\s*\d+.*?)\]',
2929- '(?i)\[([^]]*(?:feat\.|featuring|ft\.|with|vs\.|versus|&|and)\s+.*?)\]',
3030- '(?i)\[([^]]*(?:from|soundtrack|ost|score|theme).*?)\]',
3131-3232- -- Braces patterns
3333- '(?i)\{([^}]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray|hdtv|web|retail|promo|single|ep|lp|maxi|mini|radio|club|dance|house|techno|trance|ambient|classical|jazz|folk|country|rock|pop|metal|punk|indie|alternative).*?)\}',
3434- '(?i)\{([^}]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?)\}',
3535- '(?i)\{([^}]*(?:vol\.|volume|pt\.|part|disc|disk|cd)\s*\d+.*?)\}',
3636- '(?i)\{([^}]*(?:feat\.|featuring|ft\.|with|vs\.|versus|&|and)\s+.*?)\}',
3737- '(?i)\{([^}]*(?:from|soundtrack|ost|score|theme).*?)\}',
3838-3939- -- Dash/hyphen patterns (common for editions)
4040- '(?i)[-–—]\s*([^-–—]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray).*?)$',
4141- '(?i)[-–—]\s*(\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?$',
4242-4343- -- Colon patterns (common for subtitles and versions)
4444- '(?i):\s*([^:]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive).*?)$',
4545- '(?i):\s*(\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?$'
4646- ];
4747-4848- pattern TEXT;
4949- match_result TEXT;
5050-BEGIN
5151- -- Return early if input is null or empty
5252- IF name_text IS NULL OR trim(name_text) = '' THEN
5353- RETURN NULL;
5454- END IF;
5555-5656- -- Try each pattern to find discriminant information
5757- FOREACH pattern IN ARRAY discriminant_patterns
5858- LOOP
5959- SELECT substring(name_text FROM pattern) INTO match_result;
6060- IF match_result IS NOT NULL AND length(trim(match_result)) > 0 THEN
6161- -- Clean up the match result
6262- match_result := trim(match_result);
6363- -- Remove leading/trailing punctuation
6464- match_result := regexp_replace(match_result, '^[^\w]+|[^\w]+$', '', 'g');
6565- -- Ensure it's not just whitespace or empty after cleanup
6666- IF length(trim(match_result)) > 0 THEN
6767- RETURN match_result;
6868- END IF;
6969- END IF;
7070- END LOOP;
7171-7272- RETURN NULL;
7373-END;
7474-$$ LANGUAGE plpgsql IMMUTABLE;
7575-7676--- Enhanced function to get base name without discriminant with case-insensitive matching
7777-CREATE OR REPLACE FUNCTION get_base_name(name_text TEXT) RETURNS TEXT AS $$
7878-DECLARE
7979- -- Comprehensive cleanup patterns matching the extraction patterns
8080- cleanup_patterns TEXT[] := ARRAY[
8181- -- Remove parentheses content
8282- '(?i)\s*\([^)]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray|hdtv|web|retail|promo|single|ep|lp|maxi|mini|radio|club|dance|house|techno|trance|ambient|classical|jazz|folk|country|rock|pop|metal|punk|indie|alternative).*?\)\s*',
8383- '(?i)\s*\([^)]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?\)\s*',
8484- '(?i)\s*\([^)]*(?:vol\.|volume|pt\.|part|disc|disk|cd)\s*\d+.*?\)\s*',
8585- '(?i)\s*\([^)]*(?:feat\.|featuring|ft\.|with|vs\.|versus|&|and)\s+.*?\)\s*',
8686- '(?i)\s*\([^)]*(?:from|soundtrack|ost|score|theme).*?\)\s*',
8787-8888- -- Remove brackets content
8989- '(?i)\s*\[[^]]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray|hdtv|web|retail|promo|single|ep|lp|maxi|mini|radio|club|dance|house|techno|trance|ambient|classical|jazz|folk|country|rock|pop|metal|punk|indie|alternative).*?\]\s*',
9090- '(?i)\s*\[[^]]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?\]\s*',
9191- '(?i)\s*\[[^]]*(?:vol\.|volume|pt\.|part|disc|disk|cd)\s*\d+.*?\]\s*',
9292- '(?i)\s*\[[^]]*(?:feat\.|featuring|ft\.|with|vs\.|versus|&|and)\s+.*?\]\s*',
9393- '(?i)\s*\[[^]]*(?:from|soundtrack|ost|score|theme).*?\]\s*',
9494-9595- -- Remove braces content
9696- '(?i)\s*\{[^}]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray|hdtv|web|retail|promo|single|ep|lp|maxi|mini|radio|club|dance|house|techno|trance|ambient|classical|jazz|folk|country|rock|pop|metal|punk|indie|alternative).*?\}\s*',
9797- '(?i)\s*\{[^}]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?\}\s*',
9898- '(?i)\s*\{[^}]*(?:vol\.|volume|pt\.|part|disc|disk|cd)\s*\d+.*?\}\s*',
9999- '(?i)\s*\{[^}]*(?:feat\.|featuring|ft\.|with|vs\.|versus|&|and)\s+.*?\}\s*',
100100- '(?i)\s*\{[^}]*(?:from|soundtrack|ost|score|theme).*?\}\s*',
101101-102102- -- Remove dash/hyphen patterns
103103- '(?i)\s*[-–—]\s*[^-–—]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray).*?$',
104104- '(?i)\s*[-–—]\s*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?$',
105105-106106- -- Remove colon patterns
107107- '(?i)\s*:\s*[^:]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive).*?$',
108108- '(?i)\s*:\s*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?$'
109109- ];
110110-111111- pattern TEXT;
112112- result_text TEXT := name_text;
113113-BEGIN
114114- -- Return early if input is null or empty
115115- IF name_text IS NULL OR trim(name_text) = '' THEN
116116- RETURN name_text;
117117- END IF;
118118-119119- -- Remove discriminant patterns to get base name
120120- FOREACH pattern IN ARRAY cleanup_patterns
121121- LOOP
122122- result_text := regexp_replace(result_text, pattern, ' ', 'g');
123123- END LOOP;
124124-125125- -- Clean up extra whitespace and normalize
126126- result_text := regexp_replace(trim(result_text), '\s+', ' ', 'g');
127127-128128- -- Remove trailing punctuation that might be left after removal
129129- result_text := regexp_replace(result_text, '[,;:\-–—]\s*$', '', 'g');
130130- result_text := trim(result_text);
131131-132132- -- Ensure we don't return an empty string
133133- IF length(result_text) = 0 THEN
134134- RETURN name_text;
135135- END IF;
136136-137137- RETURN result_text;
138138-END;
139139-$$ LANGUAGE plpgsql IMMUTABLE;
140140-141141--- Enhanced function to extract discriminant specifically for editions and versions with case-insensitive matching
142142-CREATE OR REPLACE FUNCTION extract_edition_discriminant(name_text TEXT) RETURNS TEXT AS $$
143143-DECLARE
144144- -- Focused patterns for edition/version extraction with case-insensitive flags
145145- edition_patterns TEXT[] := ARRAY[
146146- -- Edition patterns
147147- '(?i)\(([^)]*edition[^)]*)\)',
148148- '(?i)\[([^]]*edition[^]]*)\]',
149149- '(?i)\{([^}]*edition[^}]*)\}',
150150- '(?i)[-–—]\s*([^-–—]*edition[^-–—]*)$',
151151- '(?i):\s*([^:]*edition[^:]*)$',
152152-153153- -- Version patterns
154154- '(?i)\(([^)]*version[^)]*)\)',
155155- '(?i)\[([^]]*version[^]]*)\]',
156156- '(?i)\{([^}]*version[^}]*)\}',
157157- '(?i)[-–—]\s*([^-–—]*version[^-–—]*)$',
158158- '(?i):\s*([^:]*version[^:]*)$',
159159-160160- -- Remaster patterns
161161- '(?i)\(([^)]*remaster[^)]*)\)',
162162- '(?i)\[([^]]*remaster[^]]*)\]',
163163- '(?i)\{([^}]*remaster[^}]*)\}',
164164- '(?i)[-–—]\s*([^-–—]*remaster[^-–—]*)$',
165165- '(?i):\s*([^:]*remaster[^:]*)$',
166166-167167- -- Year-based patterns
168168- '(?i)\(([^)]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release)[^)]*)\)',
169169- '(?i)\[([^]]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release)[^]]*)\]',
170170- '(?i)\{([^}]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release)[^}]*)\}'
171171- ];
172172-173173- pattern TEXT;
174174- match_result TEXT;
175175-BEGIN
176176- -- Return early if input is null or empty
177177- IF name_text IS NULL OR trim(name_text) = '' THEN
178178- RETURN NULL;
179179- END IF;
180180-181181- -- Try edition-specific patterns first
182182- FOREACH pattern IN ARRAY edition_patterns
183183- LOOP
184184- SELECT substring(name_text FROM pattern) INTO match_result;
185185- IF match_result IS NOT NULL AND length(trim(match_result)) > 0 THEN
186186- match_result := trim(match_result);
187187- match_result := regexp_replace(match_result, '^[^\w]+|[^\w]+$', '', 'g');
188188- IF length(trim(match_result)) > 0 THEN
189189- RETURN match_result;
190190- END IF;
191191- END IF;
192192- END LOOP;
193193-194194- RETURN NULL;
195195-END;
196196-$$ LANGUAGE plpgsql IMMUTABLE;
197197-198198--- Update existing records with newly extracted discriminants (case-insensitive)
199199-UPDATE recordings
200200-SET discriminant = extract_discriminant(name)
201201-WHERE discriminant IS NULL
202202- AND extract_discriminant(name) IS NOT NULL;
203203-204204-UPDATE releases
205205-SET discriminant = extract_discriminant(name)
206206-WHERE discriminant IS NULL
207207- AND extract_discriminant(name) IS NOT NULL;
208208-209209-UPDATE plays
210210-SET track_discriminant = extract_discriminant(track_name)
211211-WHERE track_discriminant IS NULL
212212- AND extract_discriminant(track_name) IS NOT NULL;
213213-214214-UPDATE plays
215215-SET release_discriminant = extract_discriminant(release_name)
216216-WHERE release_discriminant IS NULL
217217- AND release_name IS NOT NULL
218218- AND extract_discriminant(release_name) IS NOT NULL;
219219-220220--- Update comments for the enhanced functions
221221-COMMENT ON FUNCTION extract_discriminant IS 'Enhanced case-insensitive discriminant extraction supporting comprehensive edition/version patterns including parentheses, brackets, braces, dashes, and colons';
222222-COMMENT ON FUNCTION get_base_name IS 'Enhanced case-insensitive base name extraction removing comprehensive discriminant patterns to enable proper grouping';
223223-COMMENT ON FUNCTION extract_edition_discriminant IS 'Specialized case-insensitive function for extracting edition and version discriminants with focused patterns';
224224-225225--- Refresh materialized views to reflect the case-insensitive improvements
226226-REFRESH MATERIALIZED VIEW mv_release_play_counts;
227227-REFRESH MATERIALIZED VIEW mv_recording_play_counts;
228228-229229--- Update discriminant analysis view to include case-insensitive results
230230-DROP VIEW IF EXISTS discriminant_analysis;
231231-CREATE OR REPLACE VIEW discriminant_analysis AS
232232-SELECT
233233- 'recordings' as table_name,
234234- name as original_name,
235235- discriminant,
236236- get_base_name(name) as base_name,
237237- extract_discriminant(name) as extracted_discriminant,
238238- extract_edition_discriminant(name) as edition_discriminant
239239-FROM recordings
240240-WHERE name IS NOT NULL
241241-UNION ALL
242242-SELECT
243243- 'releases' as table_name,
244244- name as original_name,
245245- discriminant,
246246- get_base_name(name) as base_name,
247247- extract_discriminant(name) as extracted_discriminant,
248248- extract_edition_discriminant(name) as edition_discriminant
249249-FROM releases
250250-WHERE name IS NOT NULL;
251251-252252-COMMENT ON VIEW discriminant_analysis IS 'Analysis view showing case-insensitive discriminant extraction results for quality assessment and debugging';