Skip to content

Commit a345669

Browse files
committed
Initial Commit
1 parent 88e8823 commit a345669

File tree

2 files changed

+200
-2
lines changed

2 files changed

+200
-2
lines changed

README.md

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,15 @@
1-
# ehr_docker
2-
Docker container building scripts for MIMIC-IV, MIMIC-III, and eICU.
1+
# ehr_postgres
2+
Postgres Docker container setup scripts for MIMIC-IV (including ED), MIMIC-III, and eICU datasets.
3+
4+
## How-to-use
5+
1. Download EHR data what you want from [physionet](https://physionet.org).
6+
2. Run `python --mimic_iv {MIMIC_IV_PATH} --mimic_iv_ed {MIMIC_IV_ED_PATH} --mimic_iii {MIMIC_III_PATH} --eicu {EICU_PATH}` to bulid and run docker container.
7+
* NOTE: You can use each option separately.
8+
* NOTE: This script automatically removes pre-existing containers and images named `ehr_postgres`.
9+
3. Run `PGPASSWORD=postgres psql -h localhost -p 5432 -U postgres`, and enjoy!
10+
* The database names are `mimiciv`, `mimiciii`, `eicu`.
11+
12+
## NOTE
13+
- This script has been tested with the following dataset versions: MIMC-IV v2.2, MIMIC-IV-ED v2.2, MIMIC-III v1.4, and eICU v2.0.
14+
- The whole process takes few hours.
15+
- Each dataset requires approximately 100GB of storage space.

build.py

Lines changed: 185 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,185 @@
1+
import argparse
2+
import os
3+
import subprocess
4+
import sys
5+
6+
start = """FROM postgres:17.2
7+
ENV POSTGRES_USER=postgres
8+
ENV POSTGRES_PASSWORD=postgres
9+
RUN apt update && apt install git make -y
10+
RUN mkdir /data
11+
12+
USER postgres
13+
14+
RUN cd ~
15+
"""
16+
17+
mimic_iv = """#!/bin/bash
18+
set -e
19+
echo "START BUILDING MIMIC-IV"
20+
21+
git clone https://github.com/MIT-LCP/mimic-code.git /tmp/mimic-code
22+
cd /tmp/mimic-code
23+
if ! psql -U postgres -lqt | cut -d \| -f 1 | grep -wq mimiciv; then
24+
createdb mimiciv
25+
fi
26+
psql -d mimiciv -f mimic-iv/buildmimic/postgres/create.sql
27+
psql -d mimiciv -v ON_ERROR_STOP=1 -v mimic_data_dir=/tmp/mimic_iv -f mimic-iv/buildmimic/postgres/load{}.sql
28+
psql -d mimiciv -v ON_ERROR_STOP=1 -v mimic_data_dir=/tmp/mimic_iv -f mimic-iv/buildmimic/postgres/constraint.sql
29+
psql -d mimiciv -v ON_ERROR_STOP=1 -v mimic_data_dir=/tmp/mimic_iv -f mimic-iv/buildmimic/postgres/index.sql
30+
psql -d mimiciv -v ON_ERROR_STOP=1 -f mimic-iv/buildmimic/postgres/validate.sql
31+
cd mimic-iv/concepts_postgres
32+
psql -d mimiciv -v ON_ERROR_STOP=1 -f postgres-make-concepts.sql
33+
34+
cd ~
35+
rm -rf /tmp/mimic-code
36+
37+
echo "FINISH BUILDING MIMIC-IV"
38+
"""
39+
40+
mimic_iv_ed = """#!/bin/bash
41+
set -e
42+
echo "START BUILDING MIMIC-IV-ED"
43+
44+
git clone https://github.com/MIT-LCP/mimic-code.git /tmp/mimic-code
45+
cd /tmp/mimic-code
46+
if ! psql -U postgres -lqt | cut -d \| -f 1 | grep -wq mimiciv; then
47+
createdb mimiciv
48+
fi
49+
psql -d mimiciv -f mimic-iv-ed/buildmimic/postgres/create.sql
50+
psql -d mimiciv -v ON_ERROR_STOP=1 -v mimic_data_dir=/tmp/mimic_iv_ed -f mimic-iv-ed/buildmimic/postgres/load_gz.sql
51+
psql -d mimiciv -v ON_ERROR_STOP=1 -v mimic_data_dir=/tmp/mimic_iv_ed -f mimic-iv-ed/buildmimic/postgres/validate.sql
52+
53+
cd ~
54+
rm -rf /tmp/mimic-code
55+
56+
echo "FINISH BUILDING MIMIC-IV-ED"
57+
"""
58+
59+
mimic_iii = """#!/bin/bash
60+
set -e
61+
echo "START BUILDING MIMIC-III"
62+
63+
git clone https://github.com/MIT-LCP/mimic-code.git /tmp/mimic-code
64+
cd /tmp/mimic-code/mimic-iii/buildmimic/postgres
65+
createdb mimiciii
66+
make mimic{} datadir="/tmp/mimic_iii" DBNAME="mimiciii"
67+
cd ../../concepts_postgres
68+
psql -d mimiciii -c "SET search_path TO mimiciii;" -f postgres-functions.sql -f postgres-make-concepts.sql
69+
70+
cd ~
71+
rm -rf /tmp/mimic-code
72+
73+
echo "FINISH BUILDING MIMIC-III"
74+
"""
75+
76+
77+
eicu = """
78+
#!/bin/bash
79+
set -e
80+
echo "START BUILDING eICU"
81+
82+
git clone https://github.com/mit-lcp/eicu-code.git /tmp/eicu-code
83+
cd /tmp/eicu-code/build-db/postgres
84+
make initialize
85+
make eicu{} datadir="/tmp/eicu/"
86+
87+
cd ~
88+
rm -rf /tmp/eicu-code
89+
90+
echo "FINISH BUILDING eICU"
91+
"""
92+
93+
script_dict = {
94+
"mimic_iv": mimic_iv,
95+
"mimic_iv_ed": mimic_iv_ed,
96+
"mimic_iii": mimic_iii,
97+
"eicu": eicu,
98+
}
99+
100+
101+
def pares_args():
102+
parser = argparse.ArgumentParser(description='Build a Dockerfile')
103+
parser.add_argument("--mimic_iv", type=str, help="MIMIC-IV Path")
104+
parser.add_argument("--mimic_iv_ed", type=str, help="MIMIC-IV-ED Path")
105+
parser.add_argument("--mimic_iii", type=str, help="MIMIC-III Path")
106+
parser.add_argument("--eicu", type=str, help="eicu Path")
107+
108+
return parser.parse_args()
109+
110+
def add_dataset(data_path, data_name, check_file, ext):
111+
if os.path.exists(os.path.join(data_path, check_file + '.csv')):
112+
ext = ""
113+
elif os.path.exists(os.path.join(data_path, check_file + ".csv.gz")):
114+
pass
115+
else:
116+
raise ValueError(f"Invalid {data_name} Path")
117+
118+
with open(f"{data_name}.sh", 'w') as f:
119+
f.write(script_dict[data_name].format(ext))
120+
121+
mount_args = ["-v", f"{data_path}:/tmp/{data_name}"]
122+
write_content = f"COPY {data_name}.sh /docker-entrypoint-initdb.d/{data_name}.sh\n"
123+
124+
return mount_args, write_content
125+
126+
127+
def main():
128+
args = pares_args()
129+
dockerfile = open("Dockerfile", "w")
130+
dockerfile.write(start)
131+
132+
run_args = []
133+
134+
if args.mimic_iv:
135+
mount_args, write_content = add_dataset(args.mimic_iv, "mimic_iv", "hosp/admissions", "_gz")
136+
run_args.extend(mount_args)
137+
dockerfile.write(write_content)
138+
139+
if args.mimic_iv_ed:
140+
mount_args, write_content = add_dataset(args.mimic_iv_ed, "mimic_iv_ed", "edstays", "_gz")
141+
run_args.extend(mount_args)
142+
dockerfile.write(write_content)
143+
144+
if args.mimic_iii:
145+
mount_args, write_content = add_dataset(args.mimic_iii, "mimic_iii", "ADMISSIONS", "-gz")
146+
run_args.extend(mount_args)
147+
dockerfile.write(write_content)
148+
149+
if args.eicu:
150+
mount_args, write_content = add_dataset(args.eicu, "eicu", "patient", "-gz")
151+
run_args.extend(mount_args)
152+
dockerfile.write(write_content)
153+
154+
dockerfile.close()
155+
print("Dockerfile created")
156+
157+
# Remove container if exists
158+
subprocess.run(["docker", "stop", "ehr_postgres"], stderr=subprocess.DEVNULL)
159+
subprocess.run(["docker", "rm", "ehr_postgres"], stderr=subprocess.DEVNULL)
160+
subprocess.run(["docker", "image", "rm", "ehr_postgres"], stderr=subprocess.DEVNULL)
161+
subprocess.run(["docker", "build", "-t", "ehr_postgres", "."])
162+
subprocess.run(["docker", "run", "--name", "ehr_postgres", "-d", "-p", "5432:5432", "-e", "POSTGRES_PASSWORD=postgres", *run_args, "ehr_postgres"])
163+
164+
print("Docker container created")
165+
166+
process = subprocess.Popen(
167+
["docker", "logs", "-f", "ehr_postgres"],
168+
stdout=subprocess.PIPE,
169+
stderr=subprocess.STDOUT,
170+
text=True
171+
)
172+
173+
for line in process.stdout:
174+
sys.stdout.write(line)
175+
sys.stdout.flush()
176+
177+
if "PostgreSQL init process complete" in line:
178+
print("Initialization complete!")
179+
process.terminate()
180+
break
181+
182+
print("EHR_POSTGRES is ready!")
183+
184+
if __name__ == "__main__":
185+
main()

0 commit comments

Comments
 (0)