Reidentification SQL

/*
DROP TABLE file_patients;
*/
CREATE TEMP TABLE file_patients
(file_row_number int4,
file_hashed_id text,
file_data_source varchar(15)
)
;


--2------------------------------------------------------------------------------------------------------
--load patient list data into temp table
    --will need to paste in the formatted text from csv. Basically, using the following:
    --="("& B2 & ", '" & F2 & "', '" & G2 & "'),"

INSERT INTO file_patients
VALUES 
 (Hash values)
,(Hash values)
,(Hash values)
;

--ALTERNATIVELY Skip steps 1 and 2 and import the csv into PUBLIC.file_patients if you have permission,
-- then modify the following queries accordingly.

--3------------------------------------------------------------------------------------------------------
--Modify the temp table to correct dropped 0's from the hashed id 
    --(since the the MDClone application cuts off leading 0's from the exported hashed_id's)


ALTER TABLE file_patients
ADD COLUMN final_file_hashed_id text
;

UPDATE file_patients
   SET final_file_hashed_id = LPAD(file_hashed_id::text, 32, '0')
 WHERE final_file_hashed_id IS NULL 
;


--------------------------------------------------------------------------------------------------------
--Link the hashed id to get the OMOP person id
/*
DROP TABLE pt_joined;
*/
CREATE TEMP TABLE pt_joined AS (
SELECT DISTINCT fp.file_row_number, fp.file_hashed_id, fp.file_data_source, fp.final_file_hashed_id, 
    mdcp.person_id, mdcp.person_id_original, mdcp.partition_source, mdcp.dateshift
FROM file_patients fp
LEFT JOIN omop_dw.mdc_etl.vw_mdclone_patientsv2_lookup mdcp ON fp.final_file_hashed_id = mdcp.patient_hashed 
ORDER BY fp.file_row_number
)
; -- 2m 41s

--------------------------------------------------------------------------------------------------------
--Join person_id's to OMOP to get MRN's
/*
DROP TABLE pt_ids_v1;
*/
CREATE TEMP TABLE pt_ids_v1 AS (
    SELECT DISTINCT file_row_number,
            file_hashed_id,
            file_data_source, 
            final_file_hashed_id, 
            pj.person_id, 
            person_id_original, 
            partition_source, 
            dateshift,
            ppi1.id_value AS Epic_MRN,
            ppi2.id_value AS Soarian_Epic_MRN,
            ppi3.id_value AS Soarian_HDC_MRN,
            ppi4.id_value AS Touchworks_MRN
    FROM pt_joined pj
    LEFT JOIN omop_prod.px_person_identifier ppi1  ON pj.person_id = ppi1.person_id 
                                                  AND ppi1.id_type = 'EPIC MRN'
    LEFT JOIN omop_prod.px_person_identifier ppi2  ON pj.person_id = ppi2.person_id 
                                                  AND ppi2.id_type = 'SOARIAN-MRN'
                                                  AND ppi2.data_source_system = 'EPIC_CLARITY'
    LEFT JOIN omop_prod.px_person_identifier ppi3  ON pj.person_id = ppi3.person_id 
                                                  AND ppi3.id_type = 'SOARIAN MRN - ENTERPRISE'
                                                  AND ppi3.data_source_system = 'HDC'
    LEFT JOIN omop_prod.px_person_identifier ppi4  ON pj.person_id = ppi4.person_id 
                                                  AND ppi4.id_type = 'TW MRN'
)
;


--Pull the data out and then merge into the MDClone csv

    SELECT DISTINCT file_row_number, file_hashed_id, /*file_data_source,*/ 
                final_file_hashed_id, 
                pv1.person_id, /*person_id_original, partition_source, */
                Epic_MRN,
                COALESCE(Soarian_Epic_MRN, Soarian_HDC_MRN) AS Soarian_MRN,
                Touchworks_MRN,
                trim(pxp.full_name) AS patient_name,
                p.birth_datetime AS birth_date,
                dateshift
        FROM pt_ids_v1 pv1
        LEFT JOIN px_person pxp ON pv1.person_id = pxp.person_id 
        LEFT JOIN person p ON pv1.person_id = p.person_id
    ORDER BY file_row_number
;

Table of Contents


Updated on August 7, 2025