MIMIC数据库的官方代码第一部分

-- function
SET search_path TO mimiciv_derived;

CREATE OR REPLACE FUNCTION REGEXP_EXTRACT(str TEXT, pattern TEXT) RETURNS TEXT AS $$
BEGIN
RETURN substring(str from pattern);
END; $$
LANGUAGE PLPGSQL;

CREATE OR REPLACE FUNCTION REGEXP_CONTAINS(str TEXT, pattern TEXT) RETURNS BOOL AS $$
BEGIN
RETURN str ~ pattern;
END; $$
LANGUAGE PLPGSQL;

-- alias generate_series with generate_array
CREATE OR REPLACE FUNCTION GENERATE_ARRAY(i INTEGER, j INTEGER)
RETURNS setof INTEGER language sql as $$
    SELECT GENERATE_SERIES(i, j)
$$;

-- datetime functions
CREATE OR REPLACE FUNCTION DATETIME(dt DATE) RETURNS TIMESTAMP(3) AS $$
BEGIN
RETURN CAST(dt AS TIMESTAMP(3));
END; $$
LANGUAGE PLPGSQL;

CREATE OR REPLACE FUNCTION DATETIME(year INTEGER, month INTEGER, day INTEGER, hour INTEGER, minute INTEGER, second INTEGER) RETURNS TIMESTAMP(3) AS $$
BEGIN
RETURN TO_TIMESTAMP(
    TO_CHAR(year, '0000') || TO_CHAR(month, '00') || TO_CHAR(day, '00') || TO_CHAR(hour, '00') || TO_CHAR(minute, '00') || TO_CHAR(second, '00'),
    'yyyymmddHH24MISS'
);
END; $$
LANGUAGE PLPGSQL;

-- overload allowing string input

--  DATETIME_ADD(datetime, INTERVAL 'n' DATEPART) -> datetime + INTERVAL 'n' DATEPART
-- note: in bigquery, `INTERVAL 1 YEAR` is a valid interval
-- but in postgres, it must be `INTERVAL '1' YEAR`
CREATE OR REPLACE FUNCTION DATETIME_ADD(datetime_val TIMESTAMP(3), intvl INTERVAL) RETURNS TIMESTAMP(3) AS $$
BEGIN
RETURN datetime_val + intvl;
END; $$
LANGUAGE PLPGSQL;

--  DATETIME_SUB(datetime, INTERVAL 'n' DATEPART) -> datetime - INTERVAL 'n' DATEPART
CREATE OR REPLACE FUNCTION DATETIME_SUB(datetime_val TIMESTAMP(3), intvl INTERVAL) RETURNS TIMESTAMP(3) AS $$
BEGIN
RETURN datetime_val - intvl;
END; $$
LANGUAGE PLPGSQL;

-- TODO:
--   DATETIME_TRUNC(datetime, PART) -> DATE_TRUNC('datepart', datetime)

-- below requires a regex to convert datepart from primitive to a string
-- i.e. encapsulate it in single quotes
CREATE OR REPLACE FUNCTION DATETIME_DIFF(endtime TIMESTAMP(3), starttime TIMESTAMP(3), datepart TEXT) RETURNS NUMERIC AS $$
BEGIN
RETURN 
    EXTRACT(EPOCH FROM endtime - starttime) /
    CASE
        WHEN datepart = 'SECOND' THEN 1.0
        WHEN datepart = 'MINUTE' THEN 60.0
        WHEN datepart = 'HOUR' THEN 3600.0
        WHEN datepart = 'DAY' THEN 24*3600.0
        WHEN datepart = 'YEAR' THEN 365.242*24*3600.0
    ELSE NULL END;
END; $$
LANGUAGE PLPGSQL;

-- BigQuery has a custom data type, PART
-- It's difficult to replicate this in postgresql, which recognizes the PART as a column name,
-- unless it is within an EXTRACT() function.

CREATE OR REPLACE FUNCTION BIGQUERY_FORMAT_TO_PSQL(format_str VARCHAR(255)) RETURNS TEXT AS $$
BEGIN
RETURN 
    -- use replace to convert BigQuery string format to postgres string format
    -- only handles a few cases since we don't extensively use this function
    REPLACE(
    REPLACE(
    REPLACE(
    REPLACE(
    REPLACE(
    REPLACE(
        format_str
        , '%S', 'SS'
    )
        , '%M', 'MI'
    )
        , '%H', 'HH24'
    )
        , '%d', 'dd'
    )
        , '%m', 'mm'
    )
        , '%Y', 'yyyy'
    )
;
END; $$
LANGUAGE PLPGSQL;


CREATE OR REPLACE FUNCTION FORMAT_DATE(format_str VARCHAR(255), datetime_val TIMESTAMP(3)) RETURNS TEXT AS $$
BEGIN
RETURN TO_CHAR(
    datetime_val,
    -- use replace to convert BigQuery string format to postgres string format
    -- only handles a few cases since we don't extensively use this function
    BIGQUERY_FORMAT_TO_PSQL(format_str)
);
END; $$
LANGUAGE PLPGSQL;


CREATE OR REPLACE FUNCTION PARSE_DATE(format_str VARCHAR(255), string_val VARCHAR(255)) RETURNS DATE AS $$
BEGIN
RETURN TO_DATE(
    string_val,
    -- use replace to convert BigQuery string format to postgres string format
    -- only handles a few cases since we don't extensively use this function
    BIGQUERY_FORMAT_TO_PSQL(format_str)
);
END; $$
LANGUAGE PLPGSQL;

CREATE OR REPLACE FUNCTION FORMAT_DATETIME(format_str VARCHAR(255), datetime_val TIMESTAMP(3)) RETURNS TEXT AS $$
BEGIN
RETURN TO_CHAR(
    datetime_val,
    -- use replace to convert BigQuery string format to postgres string format
    -- only handles a few cases since we don't extensively use this function
    BIGQUERY_FORMAT_TO_PSQL(format_str)
);
END; $$
LANGUAGE PLPGSQL;


CREATE OR REPLACE FUNCTION PARSE_DATETIME(format_str VARCHAR(255), string_val VARCHAR(255)) RETURNS TIMESTAMP(3) AS $$
BEGIN
RETURN TO_TIMESTAMP(
    string_val,
    -- use replace to convert BigQuery string format to postgres string format
    -- only handles a few cases since we don't extensively use this function
    BIGQUERY_FORMAT_TO_PSQL(format_str)
);
END; $$
LANGUAGE PLPGSQL;

age

-- age
create TABLE  if not exists age as 
SELECT 	
	ad.subject_id
	, ad.hadm_id
	, ad.admittime
	, pa.anchor_age
	, pa.anchor_year
	, DATETIME_DIFF(ad.admittime, DATETIME(pa.anchor_year, 1, 1, 0, 0, 0), 'YEAR') + pa.anchor_age AS age
FROM mimiciv_hosp.admissions ad
INNER JOIN mimiciv_hosp.patients pa
ON ad.subject_id = pa.subject_id;

icustay_detail

create TABLE  if not exists icustay_detail as 
SELECT ie.subject_id, ie.hadm_id, ie.stay_id

-- patient level factors
, pat.gender, pat.dod

-- hospital level factors
, adm.admittime, adm.dischtime
, DATETIME_DIFF(adm.dischtime, adm.admittime, 'DAY') as los_hospital
, DATETIME_DIFF(adm.admittime, DATETIME(pat.anchor_year, 1, 1, 0, 0, 0), 'YEAR') + pat.anchor_age as admission_age
, adm.race
, adm.hospital_expire_flag
, DENSE_RANK() OVER (PARTITION BY adm.subject_id ORDER BY adm.admittime) AS hospstay_seq
, CASE
    WHEN DENSE_RANK() OVER (PARTITION BY adm.subject_id ORDER BY adm.admittime) = 1 THEN True
    ELSE False END AS first_hosp_stay

-- icu level factors
, ie.intime as icu_intime, ie.outtime as icu_outtime
, ROUND(DATETIME_DIFF(ie.outtime, ie.intime, 'HOUR')/24.0, 2) as los_icu
, DENSE_RANK() OVER (PARTITION BY ie.hadm_id ORDER BY ie.intime) AS icustay_seq

-- first ICU stay *for the current hospitalization*
, CASE
    WHEN DENSE_RANK() OVER (PARTITION BY ie.hadm_id ORDER BY ie.intime) = 1 THEN True
    ELSE False END AS first_icu_stay

FROM mimiciv_icu.icustays ie
INNER JOIN mimiciv_hosp.admissions adm
    ON ie.hadm_id = adm.hadm_id
INNER JOIN mimiciv_hosp.patients pat
    ON ie.subject_id = pat.subject_id;

icustay_times

-- 3. icustay_times
create TABLE  if not exists icustay_times as 
WITH t1 AS
(
    select ce.stay_id
    , min(charttime) as intime_hr
    , max(charttime) as outtime_hr
    FROM mimiciv_icu.chartevents ce
    -- only look at heart rate
    where ce.itemid = 220045
    group by ce.stay_id
)
-- add in subject_id/hadm_id
select
  ie.subject_id, ie.hadm_id, ie.stay_id
  , t1.intime_hr
  , t1.outtime_hr
FROM mimiciv_icu.icustays ie
left join t1
  on ie.stay_id = t1.stay_id;

icustay_hourly

-- 4. icustay_hourly

-- 它的主要目的是为每位患者计算一个时间范围(从进入 ICU 前 24 小时到离开 ICU),并生成一个包含这些小时数的数组。
create TABLE  if not exists icustay_hourly as 
with all_hours as
(
select
  it.stay_id

  -- ceiling the intime to the nearest hour by adding 59 minutes then truncating
  -- note thart we truncate by parsing as string, rather than using DATETIME_TRUNC
  -- this is done to enable compatibility with psql
  , PARSE_DATETIME(
      '%Y-%m-%d %H:00:00',
      FORMAT_DATETIME(
        '%Y-%m-%d %H:00:00',
          DATETIME_ADD(it.intime_hr, INTERVAL '59' MINUTE)
  )) AS endtime

  -- create integers for each charttime in hours from admission
  -- so 0 is admission time, 1 is one hour after admission, etc, up to ICU disch
  --  we allow 24 hours before ICU admission (to grab labs before admit)
  , GENERATE_ARRAY(-24, CEIL(DATETIME_DIFF(it.outtime_hr, it.intime_hr, 'HOUR'))::INTEGER) as hrs
--hrs 是一个数组,包含从进入 ICU 前 24 小时到离开 ICU 的每个小时数。
  from icustay_times it
)
SELECT stay_id
, CAST(hr AS BIGINT) as hr
, DATETIME_ADD(endtime, interval '1 HOUR' * (CAST(hr AS bigint))) as endtime
FROM all_hours
CROSS JOIN UNNEST(array[all_hours.hrs]) AS hr;

weight_durations

-- 5. weight_durations
create TABLE  if not exists weight_durations as 
WITH wt_stg as
(
    SELECT
        c.stay_id
      , c.charttime
      , case when c.itemid = 226512 then 'admit'
          else 'daily' end as weight_type
      -- TODO: eliminate obvious outliers if there is a reasonable weight
      , c.valuenum as weight
    FROM mimiciv_icu.chartevents c
    WHERE c.valuenum IS NOT NULL
      AND c.itemid in
      (
          226512 -- Admit Wt
          , 224639 -- Daily Weight
      )
      AND c.valuenum > 0
)
-- assign ascending row number
, wt_stg1 as
(
  select
      stay_id
    , charttime
    , weight_type
    , weight
    , ROW_NUMBER() OVER (partition by stay_id, weight_type order by charttime) as rn
  from wt_stg
  WHERE weight IS NOT NULL
)
-- change charttime to intime for the first admission weight recorded
, wt_stg2 AS
(
  SELECT 
      wt_stg1.stay_id
    , ie.intime, ie.outtime
    , wt_stg1.weight_type
    , case when wt_stg1.weight_type = 'admit' and wt_stg1.rn = 1
        then DATETIME_SUB(ie.intime, INTERVAL '2' HOUR)
      else wt_stg1.charttime end as starttime
    , wt_stg1.weight
  from wt_stg1
  INNER JOIN mimiciv_icu.icustays ie
    on ie.stay_id = wt_stg1.stay_id
)
, wt_stg3 as
(
  select
    stay_id
    , intime, outtime
    , starttime
    , coalesce(
        LEAD(starttime) OVER (PARTITION BY stay_id ORDER BY starttime),
        DATETIME_ADD(outtime, INTERVAL '2' HOUR)
      ) as endtime
    , weight
    , weight_type
  from wt_stg2
)
-- this table is the start/stop times from admit/daily weight in charted data
, wt1 as
(
  select
      stay_id
    , starttime
    , coalesce(endtime,
      LEAD(starttime) OVER (partition by stay_id order by starttime),
      -- impute ICU discharge as the end of the final weight measurement
      -- plus a 2 hour "fuzziness" window
      DATETIME_ADD(outtime, INTERVAL '2' HOUR)
    ) as endtime
    , weight
    , weight_type
  from wt_stg3
)
-- if the intime for the patient is < the first charted daily weight
-- then we will have a "gap" at the start of their stay
-- to prevent this, we look for these gaps and backfill the first weight
-- this adds (153255-149657)=3598 rows, meaning this fix helps for up to 3598 stay_id
, wt_fix as
(
  select ie.stay_id
    -- we add a 2 hour "fuzziness" window
    , DATETIME_SUB(ie.intime, INTERVAL '2' HOUR) as starttime
    , wt.starttime as endtime
    , wt.weight
    , wt.weight_type
  from mimiciv_icu.icustays ie
  inner join
  -- the below subquery returns one row for each unique stay_id
  -- the row contains: the first starttime and the corresponding weight
  (
    SELECT wt1.stay_id, wt1.starttime, wt1.weight
    , weight_type
    , ROW_NUMBER() OVER (PARTITION BY wt1.stay_id ORDER BY wt1.starttime) as rn
    FROM wt1
  ) wt
    ON  ie.stay_id = wt.stay_id
    AND wt.rn = 1
    and ie.intime < wt.starttime
)
-- add the backfill rows to the main weight table
SELECT
wt1.stay_id
, wt1.starttime
, wt1.endtime
, wt1.weight
, wt1.weight_type
FROM wt1
UNION ALL
SELECT
wt_fix.stay_id
, wt_fix.starttime
, wt_fix.endtime
, wt_fix.weight
, wt_fix.weight_type
FROM wt_fix;

bg

-- 6. bg
-- The aim of this query is to pivot entries related to blood gases
-- which were found in LABEVENTS
create TABLE  if not exists bg as 
WITH bg AS
(
select 
  -- specimen_id only ever has 1 measurement for each itemid
  -- so, we may simply collapse rows using MAX()
    MAX(subject_id) AS subject_id
  , MAX(hadm_id) AS hadm_id
  , MAX(charttime) AS charttime
  -- specimen_id *may* have different storetimes, so this is taking the latest
  , MAX(storetime) AS storetime
  , le.specimen_id
  , MAX(CASE WHEN itemid = 52028 THEN value ELSE NULL END) AS specimen
  , MAX(CASE WHEN itemid = 50801 THEN valuenum ELSE NULL END) AS aado2
  , MAX(CASE WHEN itemid = 50802 THEN valuenum ELSE NULL END) AS baseexcess
  , MAX(CASE WHEN itemid = 50803 THEN valuenum ELSE NULL END) AS bicarbonate
  , MAX(CASE WHEN itemid = 50804 THEN valuenum ELSE NULL END) AS totalco2
  , MAX(CASE WHEN itemid = 50805 THEN valuenum ELSE NULL END) AS carboxyhemoglobin
  , MAX(CASE WHEN itemid = 50806 THEN valuenum ELSE NULL END) AS chloride
  , MAX(CASE WHEN itemid = 50808 THEN valuenum ELSE NULL END) AS calcium
  , MAX(CASE WHEN itemid = 50809 and valuenum <= 10000 THEN valuenum ELSE NULL END) AS glucose
  , MAX(CASE WHEN itemid = 50810 and valuenum <= 100 THEN valuenum ELSE NULL END) AS hematocrit
  , MAX(CASE WHEN itemid = 50811 THEN valuenum ELSE NULL END) AS hemoglobin
  , MAX(CASE WHEN itemid = 50813 and valuenum <= 10000 THEN valuenum ELSE NULL END) AS lactate
  , MAX(CASE WHEN itemid = 50814 THEN valuenum ELSE NULL END) AS methemoglobin
  , MAX(CASE WHEN itemid = 50815 THEN valuenum ELSE NULL END) AS o2flow
  -- fix a common unit conversion error for fio2
  -- atmospheric o2 is 20.89%, so any value <= 20 is unphysiologic
  -- usually this is a misplaced O2 flow measurement
  , MAX(CASE WHEN itemid = 50816 THEN
      CASE
        WHEN valuenum > 20 AND valuenum <= 100 THEN valuenum 
        WHEN valuenum > 0.2 AND valuenum <= 1.0 THEN valuenum*100.0
      ELSE NULL END
    ELSE NULL END) AS fio2
  , MAX(CASE WHEN itemid = 50817 AND valuenum <= 100 THEN valuenum ELSE NULL END) AS so2
  , MAX(CASE WHEN itemid = 50818 THEN valuenum ELSE NULL END) AS pco2
  , MAX(CASE WHEN itemid = 50819 THEN valuenum ELSE NULL END) AS peep
  , MAX(CASE WHEN itemid = 50820 THEN valuenum ELSE NULL END) AS ph
  , MAX(CASE WHEN itemid = 50821 THEN valuenum ELSE NULL END) AS po2
  , MAX(CASE WHEN itemid = 50822 THEN valuenum ELSE NULL END) AS potassium
  , MAX(CASE WHEN itemid = 50823 THEN valuenum ELSE NULL END) AS requiredo2
  , MAX(CASE WHEN itemid = 50824 THEN valuenum ELSE NULL END) AS sodium
  , MAX(CASE WHEN itemid = 50825 THEN valuenum ELSE NULL END) AS temperature
  , MAX(CASE WHEN itemid = 50807 THEN value ELSE NULL END) AS comments
FROM mimiciv_hosp.labevents le
where le.ITEMID in
-- blood gases
(
    52028 -- specimen
  , 50801 -- aado2
  , 50802 -- base excess
  , 50803 -- bicarb
  , 50804 -- calc tot co2
  , 50805 -- carboxyhgb
  , 50806 -- chloride
  -- , 52390 -- chloride, WB CL-
  , 50807 -- comments
  , 50808 -- free calcium
  , 50809 -- glucose
  , 50810 -- hct
  , 50811 -- hgb
  , 50813 -- lactate
  , 50814 -- methemoglobin
  , 50815 -- o2 flow
  , 50816 -- fio2
  , 50817 -- o2 sat
  , 50818 -- pco2
  , 50819 -- peep
  , 50820 -- pH
  , 50821 -- pO2
  , 50822 -- potassium
  -- , 52408 -- potassium, WB K+
  , 50823 -- required O2
  , 50824 -- sodium
  -- , 52411 -- sodium, WB NA +
  , 50825 -- temperature
)
GROUP BY le.specimen_id
)
, stg_spo2 as
(
  select subject_id, charttime
    -- avg here is just used to group SpO2 by charttime
    , AVG(valuenum) as SpO2
  FROM mimiciv_icu.chartevents
  where ITEMID = 220277 -- O2 saturation pulseoxymetry
  and valuenum > 0 and valuenum <= 100
  group by subject_id, charttime
)
, stg_fio2 as
(
  select subject_id, charttime
    -- pre-process the FiO2s to ensure they are between 21-100%
    , max(
        case
          when valuenum > 0.2 and valuenum <= 1
            then valuenum * 100
          -- improperly input data - looks like O2 flow in litres
          when valuenum > 1 and valuenum < 20
            then null
          when valuenum >= 20 and valuenum <= 100
            then valuenum
      else null end
    ) as fio2_chartevents
  FROM mimiciv_icu.chartevents
  where ITEMID = 223835 -- Inspired O2 Fraction (FiO2)
  and valuenum > 0 and valuenum <= 100
  group by subject_id, charttime
)
, stg2 as
(
select bg.*
  , ROW_NUMBER() OVER (partition by bg.subject_id, bg.charttime order by s1.charttime DESC) as lastRowSpO2
  , s1.spo2
from bg
left join stg_spo2 s1
  -- same hospitalization
  on  bg.subject_id = s1.subject_id
  -- spo2 occurred at most 2 hours before this blood gas
  and s1.charttime between DATETIME_SUB(bg.charttime, INTERVAL '2'HOUR) and bg.charttime
where bg.po2 is not null
)
, stg3 as
(
select bg.*
  , ROW_NUMBER() OVER (partition by bg.subject_id, bg.charttime order by s2.charttime DESC) as lastRowFiO2
  , s2.fio2_chartevents
  -- create our specimen prediction
  ,  1/(1+exp(-(-0.02544
  +    0.04598 * po2
  + coalesce(-0.15356 * spo2             , -0.15356 *   97.49420 +    0.13429)
  + coalesce( 0.00621 * fio2_chartevents ,  0.00621 *   51.49550 +   -0.24958)
  + coalesce( 0.10559 * hemoglobin       ,  0.10559 *   10.32307 +    0.05954)
  + coalesce( 0.13251 * so2              ,  0.13251 *   93.66539 +   -0.23172)
  + coalesce(-0.01511 * pco2             , -0.01511 *   42.08866 +   -0.01630)
  + coalesce( 0.01480 * fio2             ,  0.01480 *   63.97836 +   -0.31142)
  + coalesce(-0.00200 * aado2            , -0.00200 *  442.21186 +   -0.01328)
  + coalesce(-0.03220 * bicarbonate      , -0.03220 *   22.96894 +   -0.06535)
  + coalesce( 0.05384 * totalco2         ,  0.05384 *   24.72632 +   -0.01405)
  + coalesce( 0.08202 * lactate          ,  0.08202 *    3.06436 +    0.06038)
  + coalesce( 0.10956 * ph               ,  0.10956 *    7.36233 +   -0.00617)
  + coalesce( 0.00848 * o2flow           ,  0.00848 *    7.59362 +   -0.35803)
  ))) as specimen_prob
from stg2 bg
left join stg_fio2 s2
  -- same patient
  on  bg.subject_id = s2.subject_id
  -- fio2 occurred at most 4 hours before this blood gas
  and s2.charttime between DATETIME_SUB(bg.charttime, INTERVAL '4' HOUR) and bg.charttime
  AND s2.fio2_chartevents > 0
where bg.lastRowSpO2 = 1 -- only the row with the most recent SpO2 (if no SpO2 found lastRowSpO2 = 1)
)
select
    stg3.subject_id
  , stg3.hadm_id
  , stg3.charttime
  -- raw data indicating sample type
  , specimen 
  -- prediction of specimen for obs missing the actual specimen
  , case
        when specimen is not null then specimen
        when specimen_prob > 0.75 then 'ART.'
      else null end as specimen_pred
  , specimen_prob

  -- oxygen related parameters
  , so2
  , po2
  , pco2
  , fio2_chartevents, fio2
  , aado2
  -- also calculate AADO2
  , case
      when  po2 is null
        OR pco2 is null
      THEN NULL
      WHEN fio2 IS NOT NULL
        -- multiple by 100 because fio2 is in a % but should be a fraction
        THEN (fio2/100) * (760 - 47) - (pco2/0.8) - po2
      WHEN fio2_chartevents IS NOT NULL
        THEN (fio2_chartevents/100) * (760 - 47) - (pco2/0.8) - po2
      else null
    end as aado2_calc
  , case
      when PO2 is null
        THEN NULL
      WHEN fio2 IS NOT NULL
       -- multiply by 100 because fio2 is in a % but should be a fraction
        then 100 * PO2/fio2
      WHEN fio2_chartevents IS NOT NULL
       -- multiply by 100 because fio2 is in a % but should be a fraction
        then 100 * PO2/fio2_chartevents
      else null
    end as pao2fio2ratio
  -- acid-base parameters
  , ph, baseexcess
  , bicarbonate, totalco2

  -- blood count parameters
  , hematocrit
  , hemoglobin
  , carboxyhemoglobin
  , methemoglobin

  -- chemistry
  , chloride, calcium
  , temperature
  , potassium, sodium
  , lactate
  , glucose

  -- ventilation stuff that's sometimes input
  -- , intubated, tidalvolume, ventilationrate, ventilator
  -- , peep, o2flow
  -- , requiredo2
from stg3
where lastRowFiO2 = 1 -- only the most recent FiO2
;

blood_differential

-- 7. blood_differential
-- For reference, some common unit conversions:
-- 10^9/L == K/uL == 10^3/uL
create TABLE  if not exists blood_differential as 
WITH blood_diff AS
(
SELECT
    MAX(subject_id) AS subject_id
  , MAX(hadm_id) AS hadm_id
  , MAX(charttime) AS charttime
  , le.specimen_id
  -- create one set of columns for percentages, and one set of columns for counts
  -- we harmonize all count units into K/uL == 10^9/L
  -- counts have an "_abs" suffix, percentages do not

  -- absolute counts
  , MAX(CASE WHEN itemid in (51300, 51301, 51755) THEN valuenum ELSE NULL END) AS wbc
  , MAX(CASE WHEN itemid = 52069 THEN valuenum ELSE NULL END) AS basophils_abs
  -- 52073 in K/uL, 51199 in #/uL
  , MAX(CASE WHEN itemid = 52073 THEN valuenum WHEN itemid = 51199 THEN valuenum / 1000.0 ELSE NULL END) AS eosinophils_abs
  -- 51133 in K/uL, 52769 in #/uL
  , MAX(CASE WHEN itemid = 51133 THEN valuenum WHEN itemid = 52769 THEN valuenum / 1000.0 ELSE NULL END) AS lymphocytes_abs
  -- 52074 in K/uL, 51253 in #/uL
  , MAX(CASE WHEN itemid = 52074 THEN valuenum WHEN itemid = 51253 THEN valuenum / 1000.0 ELSE NULL END) AS monocytes_abs
  , MAX(CASE WHEN itemid = 52075 THEN valuenum ELSE NULL END) AS neutrophils_abs
  -- convert from #/uL to K/uL
  , MAX(CASE WHEN itemid = 51218 THEN valuenum / 1000.0 ELSE NULL END) AS granulocytes_abs

  -- percentages, equal to cell count / white blood cell count
  , MAX(CASE WHEN itemid = 51146 THEN valuenum ELSE NULL END) AS basophils
  , MAX(CASE WHEN itemid = 51200 THEN valuenum ELSE NULL END) AS eosinophils
  , MAX(CASE WHEN itemid in (51244, 51245) THEN valuenum ELSE NULL END) AS lymphocytes
  , MAX(CASE WHEN itemid = 51254 THEN valuenum ELSE NULL END) AS monocytes
  , MAX(CASE WHEN itemid = 51256 THEN valuenum ELSE NULL END) AS neutrophils

  -- other cell count percentages
  , MAX(CASE WHEN itemid = 51143 THEN valuenum ELSE NULL END) AS atypical_lymphocytes
  , MAX(CASE WHEN itemid = 51144 THEN valuenum ELSE NULL END) AS bands
  , MAX(CASE WHEN itemid = 52135 THEN valuenum ELSE NULL END) AS immature_granulocytes
  , MAX(CASE WHEN itemid = 51251 THEN valuenum ELSE NULL END) AS metamyelocytes
  , MAX(CASE WHEN itemid = 51257 THEN valuenum ELSE NULL END) AS nrbc

  -- utility flags which determine whether imputation is possible
  , CASE
    -- WBC is available
    WHEN MAX(CASE WHEN itemid in (51300, 51301, 51755) THEN valuenum ELSE NULL END) > 0
    -- and we have at least one percentage from the diff
    -- sometimes the entire diff is 0%, which looks like bad data
    AND SUM(CASE WHEN itemid IN (51146, 51200, 51244, 51245, 51254, 51256) THEN valuenum ELSE NULL END) > 0
    THEN 1 ELSE 0 END AS impute_abs

FROM mimiciv_hosp.labevents le
WHERE le.itemid IN
(
    51146, -- basophils
    52069, -- Absolute basophil count
    51199, -- Eosinophil Count
    51200, -- Eosinophils
    52073, -- Absolute Eosinophil count
    51244, -- Lymphocytes
    51245, -- Lymphocytes, Percent
    51133, -- Absolute Lymphocyte Count
    52769, -- Absolute Lymphocyte Count
    51253, -- Monocyte Count
    51254, -- Monocytes
    52074, -- Absolute Monocyte Count
    51256, -- Neutrophils
    52075, -- Absolute Neutrophil Count
    51143, -- Atypical lymphocytes
    51144, -- Bands (%)
    51218, -- Granulocyte Count
    52135, -- Immature granulocytes (%)
    51251, -- Metamyelocytes
    51257,  -- Nucleated Red Cells

    -- wbc totals measured in K/uL
    51300, 51301, 51755
    -- 52220 (wbcp) is percentage

    -- below are point of care tests which are extremely infrequent and usually low quality
    -- 51697, -- Neutrophils (mmol/L)

    -- below itemid do not have data as of MIMIC-IV v1.0
    -- 51536, -- Absolute Lymphocyte Count
    -- 51537, -- Absolute Neutrophil
    -- 51690, -- Lymphocytes
    -- 52151, -- NRBC
)
AND valuenum IS NOT NULL
-- differential values cannot be negative
AND valuenum >= 0
GROUP BY le.specimen_id
)
SELECT 
subject_id, hadm_id, charttime, specimen_id

, wbc
-- impute absolute count if percentage & WBC is available
, ROUND((CASE
    WHEN basophils_abs IS NULL AND basophils IS NOT NULL AND impute_abs = 1
        THEN basophils * wbc
    ELSE basophils_abs
END)::numeric, 4) AS basophils_abs
, ROUND(CASE
    WHEN eosinophils_abs IS NULL AND eosinophils IS NOT NULL AND impute_abs = 1
        THEN eosinophils * wbc
    ELSE eosinophils_abs
END::numeric, 4) AS eosinophils_abs
, ROUND(CASE
    WHEN lymphocytes_abs IS NULL AND lymphocytes IS NOT NULL AND impute_abs = 1
        THEN lymphocytes * wbc
    ELSE lymphocytes_abs
END::numeric, 4) AS lymphocytes_abs
, ROUND((CASE
    WHEN monocytes_abs IS NULL AND monocytes IS NOT NULL AND impute_abs = 1
        THEN monocytes * wbc
    ELSE monocytes_abs
END)::numeric, 4) AS monocytes_abs
, ROUND((CASE
    WHEN neutrophils_abs IS NULL AND neutrophils IS NOT NULL AND impute_abs = 1
        THEN neutrophils * wbc
    ELSE neutrophils_abs
END)::numeric, 4) AS neutrophils_abs

, basophils
, eosinophils
, lymphocytes
, monocytes
, neutrophils

-- impute bands/blasts?
, atypical_lymphocytes
, bands
, immature_granulocytes
, metamyelocytes
, nrbc
FROM blood_diff
;

cardiac_marker

-- 8. cardiac_marker
-- begin query that extracts the data
create TABLE  if not exists cardiac_marker as 
SELECT
    MAX(subject_id) AS subject_id
  , MAX(hadm_id) AS hadm_id
  , MAX(charttime) AS charttime
  , le.specimen_id
  -- convert from itemid into a meaningful column
  , MAX(CASE WHEN itemid = 51002 THEN value ELSE NULL END) AS troponin_i
  , MAX(CASE WHEN itemid = 51003 THEN value ELSE NULL END) AS troponin_t
  , MAX(CASE WHEN itemid = 50911 THEN valuenum ELSE NULL END) AS ck_mb
FROM mimiciv_hosp.labevents le
WHERE le.itemid IN
(
    -- 51002, -- Troponin I (troponin-I is not measured in MIMIC-IV)
    -- 52598, -- Troponin I, point of care, rare/poor quality
    51003, -- Troponin T
    50911  -- Creatinine Kinase, MB isoenzyme
)
GROUP BY le.specimen_id
;

chemistry

-- 9.chemistry
-- extract chemistry labs
-- excludes point of care tests (very rare)
-- blood gas measurements are *not* included in this query
-- instead they are in bg.sql
create TABLE  if not exists chemistry as 
SELECT 
    MAX(subject_id) AS subject_id
  , MAX(hadm_id) AS hadm_id
  , MAX(charttime) AS charttime
  , le.specimen_id
  -- convert from itemid into a meaningful column
  , MAX(CASE WHEN itemid = 50862 AND valuenum <=    10 THEN valuenum ELSE NULL END) AS albumin
  , MAX(CASE WHEN itemid = 50930 AND valuenum <=    10 THEN valuenum ELSE NULL END) AS globulin
  , MAX(CASE WHEN itemid = 50976 AND valuenum <=    20 THEN valuenum ELSE NULL END) AS total_protein
  , MAX(CASE WHEN itemid = 50868 AND valuenum <= 10000 THEN valuenum ELSE NULL END) AS aniongap
  , MAX(CASE WHEN itemid = 50882 AND valuenum <= 10000 THEN valuenum ELSE NULL END) AS bicarbonate
  , MAX(CASE WHEN itemid = 51006 AND valuenum <=   300 THEN valuenum ELSE NULL END) AS bun
  , MAX(CASE WHEN itemid = 50893 AND valuenum <= 10000 THEN valuenum ELSE NULL END) AS calcium
  , MAX(CASE WHEN itemid = 50902 AND valuenum <= 10000 THEN valuenum ELSE NULL END) AS chloride
  , MAX(CASE WHEN itemid = 50912 AND valuenum <=   150 THEN valuenum ELSE NULL END) AS creatinine
  , MAX(CASE WHEN itemid = 50931 AND valuenum <= 10000 THEN valuenum ELSE NULL END) AS glucose
  , MAX(CASE WHEN itemid = 50983 AND valuenum <=   200 THEN valuenum ELSE NULL END) AS sodium
  , MAX(CASE WHEN itemid = 50971 AND valuenum <=    30 THEN valuenum ELSE NULL END) AS potassium
FROM mimiciv_hosp.labevents le
WHERE le.itemid IN
(
  -- comment is: LABEL | CATEGORY | FLUID | NUMBER OF ROWS IN LABEVENTS
  50862, -- ALBUMIN | CHEMISTRY | BLOOD | 146697
  50930, -- Globulin
  50976, -- Total protein
  50868, -- ANION GAP | CHEMISTRY | BLOOD | 769895
  -- 52456, -- Anion gap, point of care test
  50882, -- BICARBONATE | CHEMISTRY | BLOOD | 780733
  50893, -- Calcium
  50912, -- CREATININE | CHEMISTRY | BLOOD | 797476
  -- 52502, Creatinine, point of care
  50902, -- CHLORIDE | CHEMISTRY | BLOOD | 795568
  50931, -- GLUCOSE | CHEMISTRY | BLOOD | 748981
  -- 52525, Glucose, point of care
  50971, -- POTASSIUM | CHEMISTRY | BLOOD | 845825
  -- 52566, -- Potassium, point of care
  50983, -- SODIUM | CHEMISTRY | BLOOD | 808489
  -- 52579, -- Sodium, point of care
  51006  -- UREA NITROGEN | CHEMISTRY | BLOOD | 791925
  -- 52603, Urea, point of care
)
AND valuenum IS NOT NULL
-- lab values cannot be 0 and cannot be negative
-- .. except anion gap.
AND (valuenum > 0 OR itemid = 50868)
GROUP BY le.specimen_id
;

coagulation

-- 10.coagulation
create TABLE  if not exists coagulation as 
SELECT
    MAX(subject_id) AS subject_id
  , MAX(hadm_id) AS hadm_id
  , MAX(charttime) AS charttime
  , le.specimen_id
  -- convert from itemid into a meaningful column
  , MAX(CASE WHEN itemid = 51196 THEN valuenum ELSE NULL END) AS d_dimer
  , MAX(CASE WHEN itemid = 51214 THEN valuenum ELSE NULL END) AS fibrinogen
  , MAX(CASE WHEN itemid = 51297 THEN valuenum ELSE NULL END) AS thrombin
  , MAX(CASE WHEN itemid = 51237 THEN valuenum ELSE NULL END) AS inr
  , MAX(CASE WHEN itemid = 51274 THEN valuenum ELSE NULL END) AS pt
  , MAX(CASE WHEN itemid = 51275 THEN valuenum ELSE NULL END) AS ptt
FROM mimiciv_hosp.labevents le
WHERE le.itemid IN
(
    -- 51149, 52750, 52072, 52073 -- Bleeding Time, no data as of MIMIC-IV v0.4
    51196, -- D-Dimer
    51214, -- Fibrinogen
    -- 51280, 52893, -- Reptilase Time, no data as of MIMIC-IV v0.4
    -- 51281, 52161, -- Reptilase Time Control, no data as of MIMIC-IV v0.4
    51297, -- thrombin
    51237, -- INR
    51274, -- PT
    51275 -- PTT
)
AND valuenum IS NOT NULL
GROUP BY le.specimen_id
;

complete_blood_count

-- 11. complete_blood_count
-- begin query that extracts the data
create TABLE  if not exists complete_blood_count as 
SELECT
    MAX(subject_id) AS subject_id
  , MAX(hadm_id) AS hadm_id
  , MAX(charttime) AS charttime
  , le.specimen_id
  -- convert from itemid into a meaningful column
  , MAX(CASE WHEN itemid = 51221 THEN valuenum ELSE NULL END) AS hematocrit
  , MAX(CASE WHEN itemid = 51222 THEN valuenum ELSE NULL END) AS hemoglobin
  , MAX(CASE WHEN itemid = 51248 THEN valuenum ELSE NULL END) AS mch
  , MAX(CASE WHEN itemid = 51249 THEN valuenum ELSE NULL END) AS mchc
  , MAX(CASE WHEN itemid = 51250 THEN valuenum ELSE NULL END) AS mcv
  , MAX(CASE WHEN itemid = 51265 THEN valuenum ELSE NULL END) AS platelet
  , MAX(CASE WHEN itemid = 51279 THEN valuenum ELSE NULL END) AS rbc
  , MAX(CASE WHEN itemid = 51277 THEN valuenum ELSE NULL END) AS rdw
  , MAX(CASE WHEN itemid = 52159 THEN valuenum ELSE NULL END) AS rdwsd
  , MAX(CASE WHEN itemid = 51301 THEN valuenum ELSE NULL END) AS wbc
FROM mimiciv_hosp.labevents le
WHERE le.itemid IN
(
    51221, -- hematocrit
    51222, -- hemoglobin
    51248, -- MCH
    51249, -- MCHC
    51250, -- MCV
    51265, -- platelets
    51279, -- RBC
    51277, -- RDW
    52159, -- RDW SD
    51301  -- WBC

)
AND valuenum IS NOT NULL
-- lab values cannot be 0 and cannot be negative
AND valuenum > 0
GROUP BY le.specimen_id
;

creatinine_baseline

-- 12.creatinine_baseline
-- This query extracts the serum creatinine baselines of adult patients on each hospital admission.
-- The baseline is determined by the following rules:
--     i. if the lowest creatinine value during this admission is normal (<1.1), then use the value
--     ii. if the patient is diagnosed with chronic kidney disease (CKD), then use the lowest creatinine value during the admission, although it may be rather large.
--     iii. Otherwise, we estimate the baseline using the Simplified MDRD Formula:
--          eGFR = 186 × Scr^(-1.154) × Age^(-0.203) × 0.742Female
--     Let eGFR = 75. Scr = [ 75 / 186 / Age^(-0.203) / (0.742Female) ] ^ (1/-1.154)



create TABLE  if not exists creatinine_baseline as 
WITH p as
(
    SELECT 
        ag.subject_id
        , ag.hadm_id
        , ag.age
        , p.gender
        , CASE WHEN p.gender='F' THEN 
            POWER(75.0 / 186.0 / POWER(ag.age, -0.203) / 0.742, -1/1.154)
            ELSE 
            POWER(75.0 / 186.0 / POWER(ag.age, -0.203), -1/1.154)
            END 
            AS MDRD_est --估算的肾小球滤过率
    FROM age ag
    LEFT JOIN mimiciv_hosp.patients p
    ON ag.subject_id = p.subject_id
    WHERE ag.age >= 18
)
, lab as
(
    SELECT 
        hadm_id
        , MIN(creatinine) AS scr_min
    FROM chemistry
    GROUP BY hadm_id
)
, ckd as 
(
    SELECT hadm_id, MAX(1) AS CKD_flag
    FROM mimiciv_hosp.diagnoses_icd
    WHERE 
        (
            SUBSTR(icd_code, 1, 3) = '585'
            --筛选出 icd_code 字段以 '585' 开头并使用的是 ICD-9 版本的记录
            AND 
            icd_version = 9
        )
    OR 
        (
            SUBSTR(icd_code, 1, 3) = 'N18'
            AND 
            icd_version = 10
        )
    GROUP BY 1
)
SELECT 
    p.hadm_id
    , p.gender
    , p.age
    , lab.scr_min
    , COALESCE(ckd.ckd_flag, 0) AS ckd
    , p.MDRD_est
    , CASE 
    WHEN lab.scr_min<=1.1 THEN scr_min
    WHEN ckd.ckd_flag=1 THEN scr_min
    ELSE MDRD_est END AS scr_baseline
FROM p
LEFT JOIN lab
ON p.hadm_id = lab.hadm_id
LEFT JOIN ckd
ON p.hadm_id = ckd.hadm_id
;

enzyme

-- 13.enzyme
-- begin query that extracts the data
create TABLE  if not exists enzyme as 
SELECT
    MAX(subject_id) AS subject_id
  , MAX(hadm_id) AS hadm_id
  , MAX(charttime) AS charttime
  , le.specimen_id
  -- convert from itemid into a meaningful column
  , MAX(CASE WHEN itemid = 50861 THEN valuenum ELSE NULL END) AS alt
  , MAX(CASE WHEN itemid = 50863 THEN valuenum ELSE NULL END) AS alp
  , MAX(CASE WHEN itemid = 50878 THEN valuenum ELSE NULL END) AS ast
  , MAX(CASE WHEN itemid = 50867 THEN valuenum ELSE NULL END) AS amylase
  , MAX(CASE WHEN itemid = 50885 THEN valuenum ELSE NULL END) AS bilirubin_total
  , MAX(CASE WHEN itemid = 50883 THEN valuenum ELSE NULL END) AS bilirubin_direct
  , MAX(CASE WHEN itemid = 50884 THEN valuenum ELSE NULL END) AS bilirubin_indirect
  , MAX(CASE WHEN itemid = 50910 THEN valuenum ELSE NULL END) AS ck_cpk
  , MAX(CASE WHEN itemid = 50911 THEN valuenum ELSE NULL END) AS ck_mb
  , MAX(CASE WHEN itemid = 50927 THEN valuenum ELSE NULL END) AS ggt
  , MAX(CASE WHEN itemid = 50954 THEN valuenum ELSE NULL END) AS ld_ldh
FROM mimiciv_hosp.labevents le
WHERE le.itemid IN
(
    50861, -- Alanine transaminase (ALT)
    50863, -- Alkaline phosphatase (ALP)
    50878, -- Aspartate transaminase (AST)
    50867, -- Amylase
    50885, -- total bili
    50884, -- indirect bili
    50883, -- direct bili
    50910, -- ck_cpk
    50911, -- CK-MB
    50927, -- Gamma Glutamyltransferase (GGT)
    50954 -- ld_ldh
)
AND valuenum IS NOT NULL
-- lab values cannot be 0 and cannot be negative
AND valuenum > 0
GROUP BY le.specimen_id
;

gcs

-- 14.gcs
-- This query extracts the Glasgow Coma Scale, a measure of neurological function.
-- The query has a few special rules:
--    (1) The verbal component can be set to 0 if the patient is ventilated.
--    This is corrected to 5 - the overall GCS is set to 15 in these cases.
--    (2) Often only one of three components is documented. The other components
--    are carried forward.

-- ITEMIDs used:

-- METAVISION
--    223900 GCS - Verbal Response
--    223901 GCS - Motor Response
--    220739 GCS - Eye Opening

-- Note:
--  The GCS for sedated patients is defaulted to 15 in this code.
--  This is in line with how the data is meant to be collected.
--  e.g., from the SAPS II publication:
--    For sedated patients, the Glasgow Coma Score before sedation was used.
--    This was ascertained either from interviewing the physician who ordered the sedation,
--    or by reviewing the patient's medical record.
create TABLE  if not exists gcs as 
with base as
(
  select 
    subject_id
  , ce.stay_id, ce.charttime
  -- pivot each value into its own column
  , max(case when ce.ITEMID = 223901 then ce.valuenum else null end) as GCSMotor
  , max(case
      when ce.ITEMID = 223900 and ce.VALUE = 'No Response-ETT' then 0
      when ce.ITEMID = 223900 then ce.valuenum
      else null 
    end) as GCSVerbal
  , max(case when ce.ITEMID = 220739 then ce.valuenum else null end) as GCSEyes
  -- convert the data into a number, reserving a value of 0 for ET/Trach
  , max(case
      -- endotrach/vent is assigned a value of 0
      -- flag it here to later parse specially
      when ce.ITEMID = 223900 and ce.VALUE = 'No Response-ETT' then 1 -- metavision
    else 0 end)
    as endotrachflag
  , ROW_NUMBER ()
          OVER (PARTITION BY ce.stay_id ORDER BY ce.charttime ASC) as rn
  from mimiciv_icu.chartevents ce
  -- Isolate the desired GCS variables
  where ce.ITEMID in
  (
    -- GCS components, Metavision
    223900, 223901, 220739
  )
  group by ce.subject_id, ce.stay_id, ce.charttime
)
, gcs as (
  select b.*
  , b2.GCSVerbal as GCSVerbalPrev
  , b2.GCSMotor as GCSMotorPrev
  , b2.GCSEyes as GCSEyesPrev
  -- Calculate GCS, factoring in special case when they are intubated and prev vals
  -- note that the coalesce are used to implement the following if:
  --  if current value exists, use it
  --  if previous value exists, use it
  --  otherwise, default to normal
  , case
      -- replace GCS during sedation with 15
      when b.GCSVerbal = 0
        then 15
      when b.GCSVerbal is null and b2.GCSVerbal = 0
        then 15
      -- if previously they were intub, but they aren't now, do not use previous GCS values
      when b2.GCSVerbal = 0
        then
            coalesce(b.GCSMotor,6)
          + coalesce(b.GCSVerbal,5)
          + coalesce(b.GCSEyes,4)
      -- otherwise, add up score normally, imputing previous value if none available at current time
      else
            coalesce(b.GCSMotor,coalesce(b2.GCSMotor,6))
          + coalesce(b.GCSVerbal,coalesce(b2.GCSVerbal,5))
          + coalesce(b.GCSEyes,coalesce(b2.GCSEyes,4))
      end as GCS

  from base b
  -- join to itself within 6 hours to get previous value
  left join base b2
    on b.stay_id = b2.stay_id
    and b.rn = b2.rn+1
    and b2.charttime > DATETIME_ADD(b.charttime, INTERVAL '6' HOUR)
--检查 b2.charttime 是否晚于 b.charttime 之后的6小时。
)
-- combine components with previous within 6 hours
-- filter down to cohort which is not excluded
-- truncate charttime to the hour
, gcs_stg as
(
  select
    subject_id
  , gs.stay_id, gs.charttime
  , GCS
  , coalesce(GCSMotor,GCSMotorPrev) as GCSMotor
  , coalesce(GCSVerbal,GCSVerbalPrev) as GCSVerbal
  , coalesce(GCSEyes,GCSEyesPrev) as GCSEyes
  , case when coalesce(GCSMotor,GCSMotorPrev) is null then 0 else 1 end
  + case when coalesce(GCSVerbal,GCSVerbalPrev) is null then 0 else 1 end
  + case when coalesce(GCSEyes,GCSEyesPrev) is null then 0 else 1 end
    as components_measured
  , EndoTrachFlag
  from gcs gs
)
-- priority is:
--  (i) complete data, (ii) non-sedated GCS, (iii) lowest GCS, (iv) charttime
, gcs_priority as
(
  select 
      subject_id
    , stay_id
    , charttime
    , gcs
    , gcsmotor
    , gcsverbal
    , gcseyes
    , EndoTrachFlag
    , ROW_NUMBER() over
      (
        PARTITION BY stay_id, charttime
        ORDER BY components_measured DESC, endotrachflag, gcs, charttime DESC
      ) as rn
  from gcs_stg
)
select
  gs.subject_id
  , gs.stay_id
  , gs.charttime
  , GCS AS gcs
  , GCSMotor AS gcs_motor
  , GCSVerbal AS gcs_verbal
  , GCSEyes AS gcs_eyes
  , EndoTrachFlag AS gcs_unable
from gcs_priority gs
where rn = 1
;

原创文章(本站视频密码:66668888),作者:xujunzju,如若转载,请注明出处:https://zyicu.cn/?p=20172

Like (0)
Donate 微信扫一扫 微信扫一扫 支付宝扫一扫 支付宝扫一扫
xujunzju的头像xujunzju管理者
Previous 2025年3月29日 11:28
Next 2025年4月18日 19:37

相关推荐

发表回复

Please Login to Comment
联系我们
邮箱:
xujunzju@gmail.com
公众号:
xujunzju6174
捐赠本站
捐赠本站
SHARE
TOP