Spaces:
Sleeping
Sleeping
Jesse Liu commited on
Commit ·
2e55dbe
1
Parent(s): ed8a021
Update: Switch to new JSONL data format and fix UI issues
Browse files- Load data from meta-llama__Llama-3.2-3B-Instruct_io.jsonl
- Remove tip about multiple admissions
- Fix markdown rendering for AI Generated Summary
- Fix crash when switching patients (improved error handling)
- Auto-select first admission when patient changes
- chatgpt.py +148 -37
- meta-llama__Llama-3.2-3B-Instruct_io.jsonl +0 -0
chatgpt.py
CHANGED
|
@@ -742,7 +742,52 @@ with gr.Blocks() as app:
|
|
| 742 |
return m.group(1) if m else ""
|
| 743 |
|
| 744 |
eval_data_by_patient = {}
|
| 745 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 746 |
files = os.listdir(summaries_dir)
|
| 747 |
pattern = re.compile(r"^subject_(\d+)_hadm_(\d+)_(model_input|summary)\.txt$")
|
| 748 |
for fname in files:
|
|
@@ -891,7 +936,6 @@ with gr.Blocks() as app:
|
|
| 891 |
# Patient Sample Selection for Evaluation
|
| 892 |
with gr.Box():
|
| 893 |
gr.Markdown("## Select Patient Sample")
|
| 894 |
-
gr.Markdown("**Tip:** Same patients may have multiple admissions - each is a separate evaluation case")
|
| 895 |
|
| 896 |
# Clinician ID selection
|
| 897 |
if eval_groups:
|
|
@@ -1054,7 +1098,10 @@ with gr.Blocks() as app:
|
|
| 1054 |
if entries:
|
| 1055 |
entries_sorted = sorted(entries, key=lambda x: x[1] if x[1] != 'None' else '')
|
| 1056 |
hadms = [entries_sorted[-1][0]] # Keep only the most recent
|
| 1057 |
-
|
|
|
|
|
|
|
|
|
|
| 1058 |
|
| 1059 |
def parse_admission_label(label):
|
| 1060 |
# Label format: "Admission {hadm} — {timestamp} (idx?)"
|
|
@@ -1258,37 +1305,73 @@ with gr.Blocks() as app:
|
|
| 1258 |
return text
|
| 1259 |
|
| 1260 |
def update_patient_eval_display(patient_id, admission_label):
|
| 1261 |
-
|
| 1262 |
-
|
| 1263 |
-
|
| 1264 |
-
|
| 1265 |
-
|
| 1266 |
-
|
| 1267 |
-
|
| 1268 |
-
|
| 1269 |
-
|
| 1270 |
-
|
| 1271 |
-
|
| 1272 |
-
|
| 1273 |
-
|
| 1274 |
-
|
| 1275 |
-
|
| 1276 |
-
|
| 1277 |
-
|
| 1278 |
-
|
| 1279 |
-
|
| 1280 |
-
|
| 1281 |
-
|
| 1282 |
-
|
| 1283 |
-
|
| 1284 |
-
|
| 1285 |
-
|
| 1286 |
-
|
| 1287 |
-
|
| 1288 |
-
|
| 1289 |
-
|
| 1290 |
-
|
| 1291 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1292 |
|
| 1293 |
# Handle clinician ID selection to filter patient list
|
| 1294 |
if eval_groups:
|
|
@@ -1322,11 +1405,39 @@ with gr.Blocks() as app:
|
|
| 1322 |
outputs=[eval_patient_radio, group_info_display]
|
| 1323 |
)
|
| 1324 |
|
| 1325 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1326 |
eval_patient_radio.change(
|
| 1327 |
-
fn=
|
| 1328 |
inputs=[eval_patient_radio],
|
| 1329 |
-
outputs=[eval_admission_radio]
|
| 1330 |
)
|
| 1331 |
|
| 1332 |
eval_admission_radio.change(
|
|
|
|
| 742 |
return m.group(1) if m else ""
|
| 743 |
|
| 744 |
eval_data_by_patient = {}
|
| 745 |
+
|
| 746 |
+
# Load from new JSONL format first
|
| 747 |
+
jsonl_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'meta-llama__Llama-3.2-3B-Instruct_io.jsonl')
|
| 748 |
+
if os.path.exists(jsonl_path):
|
| 749 |
+
print(f"Loading data from JSONL: {jsonl_path}")
|
| 750 |
+
jsonl_data = load_jsonl(jsonl_path)
|
| 751 |
+
for entry in jsonl_data:
|
| 752 |
+
# Parse patient_id format: "11318742_admission_29646478"
|
| 753 |
+
patient_id_full = entry.get('patient_id', '')
|
| 754 |
+
if '_admission_' in patient_id_full:
|
| 755 |
+
parts = patient_id_full.split('_admission_')
|
| 756 |
+
if len(parts) == 2:
|
| 757 |
+
patient_id = parts[0]
|
| 758 |
+
admission_id = parts[1]
|
| 759 |
+
else:
|
| 760 |
+
# Fallback: use full string as patient_id
|
| 761 |
+
patient_id = patient_id_full
|
| 762 |
+
admission_id = '1'
|
| 763 |
+
else:
|
| 764 |
+
patient_id = patient_id_full
|
| 765 |
+
admission_id = '1'
|
| 766 |
+
|
| 767 |
+
input_text = entry.get('input', '')
|
| 768 |
+
summary = entry.get('output', '')
|
| 769 |
+
|
| 770 |
+
# Extract admission time from input text if available
|
| 771 |
+
admittime = None
|
| 772 |
+
if input_text:
|
| 773 |
+
adm_match = re.search(r'Admission Time:\s*(\d{4}-\d{2}-\d{2}[\sT]\d{2}:\d{2}:\d{2})', input_text)
|
| 774 |
+
if adm_match:
|
| 775 |
+
admittime = adm_match.group(1)
|
| 776 |
+
|
| 777 |
+
patient_dict = eval_data_by_patient.setdefault(patient_id, {})
|
| 778 |
+
patient_dict[admission_id] = {
|
| 779 |
+
'patient_id': patient_id,
|
| 780 |
+
'admission_id': admission_id,
|
| 781 |
+
'input_text': input_text,
|
| 782 |
+
'summary': summary,
|
| 783 |
+
'admittime': admittime,
|
| 784 |
+
'timestamp': admittime,
|
| 785 |
+
'highlights': []
|
| 786 |
+
}
|
| 787 |
+
print(f"Loaded {len(eval_data_by_patient)} patients from JSONL")
|
| 788 |
+
|
| 789 |
+
# Fallback to old format if JSONL not found
|
| 790 |
+
if not eval_data_by_patient and os.path.exists(summaries_dir):
|
| 791 |
files = os.listdir(summaries_dir)
|
| 792 |
pattern = re.compile(r"^subject_(\d+)_hadm_(\d+)_(model_input|summary)\.txt$")
|
| 793 |
for fname in files:
|
|
|
|
| 936 |
# Patient Sample Selection for Evaluation
|
| 937 |
with gr.Box():
|
| 938 |
gr.Markdown("## Select Patient Sample")
|
|
|
|
| 939 |
|
| 940 |
# Clinician ID selection
|
| 941 |
if eval_groups:
|
|
|
|
| 1098 |
if entries:
|
| 1099 |
entries_sorted = sorted(entries, key=lambda x: x[1] if x[1] != 'None' else '')
|
| 1100 |
hadms = [entries_sorted[-1][0]] # Keep only the most recent
|
| 1101 |
+
else:
|
| 1102 |
+
# If no entries, create a default one
|
| 1103 |
+
hadms = [f"Admission {list(eval_data_by_patient[patient_id].keys())[0]}"]
|
| 1104 |
+
return gr.update(choices=hadms, value=hadms[0] if hadms else None)
|
| 1105 |
|
| 1106 |
def parse_admission_label(label):
|
| 1107 |
# Label format: "Admission {hadm} — {timestamp} (idx?)"
|
|
|
|
| 1305 |
return text
|
| 1306 |
|
| 1307 |
def update_patient_eval_display(patient_id, admission_label):
|
| 1308 |
+
try:
|
| 1309 |
+
if not patient_id:
|
| 1310 |
+
return "No patient selected", "Select a patient to view AI summary...", ""
|
| 1311 |
+
|
| 1312 |
+
# If no admission_label, try to get the first one
|
| 1313 |
+
if not admission_label:
|
| 1314 |
+
pid = str(patient_id)
|
| 1315 |
+
if pid in eval_data_by_patient and eval_data_by_patient[pid]:
|
| 1316 |
+
# Get first admission
|
| 1317 |
+
first_key = list(eval_data_by_patient[pid].keys())[0]
|
| 1318 |
+
rec = eval_data_by_patient[pid][first_key]
|
| 1319 |
+
else:
|
| 1320 |
+
return "No patient selected", "Select a patient to view AI summary...", ""
|
| 1321 |
+
else:
|
| 1322 |
+
hadm_key = parse_admission_label(admission_label)
|
| 1323 |
+
if not hadm_key:
|
| 1324 |
+
# Try direct match
|
| 1325 |
+
pid = str(patient_id)
|
| 1326 |
+
if pid in eval_data_by_patient:
|
| 1327 |
+
# Try to find matching admission
|
| 1328 |
+
for key in eval_data_by_patient[pid].keys():
|
| 1329 |
+
if key == admission_label or str(key) == admission_label:
|
| 1330 |
+
hadm_key = key
|
| 1331 |
+
break
|
| 1332 |
+
if not hadm_key:
|
| 1333 |
+
# Use first available
|
| 1334 |
+
hadm_key = list(eval_data_by_patient[pid].keys())[0]
|
| 1335 |
+
else:
|
| 1336 |
+
return "No patient selected", "Select a patient to view AI summary...", ""
|
| 1337 |
+
|
| 1338 |
+
pid = str(patient_id)
|
| 1339 |
+
rec = eval_data_by_patient.get(pid, {}).get(hadm_key)
|
| 1340 |
+
if not rec:
|
| 1341 |
+
# Try to find any admission for this patient
|
| 1342 |
+
if pid in eval_data_by_patient and eval_data_by_patient[pid]:
|
| 1343 |
+
first_key = list(eval_data_by_patient[pid].keys())[0]
|
| 1344 |
+
rec = eval_data_by_patient[pid][first_key]
|
| 1345 |
+
else:
|
| 1346 |
+
return "Patient data not found", "AI summary not found", ""
|
| 1347 |
+
|
| 1348 |
+
at = rec.get('admittime') or 'None'
|
| 1349 |
+
summary_text = rec.get('summary', '') or 'No summary available'
|
| 1350 |
+
# Prepare patient input HTML content
|
| 1351 |
+
input_text = rec.get('input_text', '') or 'No input data available'
|
| 1352 |
+
|
| 1353 |
+
# Replace ICD codes and Lab IDs with descriptions
|
| 1354 |
+
input_text_with_descriptions = replace_icd_codes_and_lab_ids(input_text)
|
| 1355 |
+
|
| 1356 |
+
sanitized_input = (
|
| 1357 |
+
input_text_with_descriptions.replace('&', '&')
|
| 1358 |
+
.replace('<', '<')
|
| 1359 |
+
.replace('>', '>')
|
| 1360 |
+
)
|
| 1361 |
+
input_html = (
|
| 1362 |
+
'<pre id="patient_input_text" '
|
| 1363 |
+
'style="max-height: 400px; overflow-y: auto; white-space: pre-wrap; '
|
| 1364 |
+
'font-family: monospace; margin: 0;">'
|
| 1365 |
+
f'{sanitized_input}'
|
| 1366 |
+
'</pre>'
|
| 1367 |
+
)
|
| 1368 |
+
# Use Markdown component for proper rendering - return raw text
|
| 1369 |
+
return input_html, summary_text, f"Admit time: {at}"
|
| 1370 |
+
except Exception as e:
|
| 1371 |
+
import traceback
|
| 1372 |
+
error_msg = f"Error loading patient data: {str(e)}\n{traceback.format_exc()}"
|
| 1373 |
+
print(f"[ERROR] {error_msg}")
|
| 1374 |
+
return "Error loading data", f"Error: {str(e)}", ""
|
| 1375 |
|
| 1376 |
# Handle clinician ID selection to filter patient list
|
| 1377 |
if eval_groups:
|
|
|
|
| 1405 |
outputs=[eval_patient_radio, group_info_display]
|
| 1406 |
)
|
| 1407 |
|
| 1408 |
+
# When patient changes, update admissions and auto-select first admission
|
| 1409 |
+
def on_patient_change(patient_id):
|
| 1410 |
+
"""When patient changes, update admissions and auto-select first admission"""
|
| 1411 |
+
if not patient_id:
|
| 1412 |
+
return gr.update(choices=[], value=None), "No patient selected", "Select a patient to view AI summary...", ""
|
| 1413 |
+
|
| 1414 |
+
# Get admissions for this patient
|
| 1415 |
+
hadms = []
|
| 1416 |
+
if patient_id in eval_data_by_patient:
|
| 1417 |
+
entries = []
|
| 1418 |
+
for hadm_key, rec in eval_data_by_patient[patient_id].items():
|
| 1419 |
+
at = rec.get('admittime') if rec.get('admittime') else 'None'
|
| 1420 |
+
base_hadm = rec.get('admission_id') or hadm_key
|
| 1421 |
+
idx = rec.get('admission_index')
|
| 1422 |
+
suffix = f" ({idx})" if idx else ""
|
| 1423 |
+
label = f"Admission {base_hadm} — {at}{suffix}"
|
| 1424 |
+
entries.append((label, at))
|
| 1425 |
+
if entries:
|
| 1426 |
+
entries_sorted = sorted(entries, key=lambda x: x[1] if x[1] != 'None' else '')
|
| 1427 |
+
hadms = [e[0] for e in entries_sorted]
|
| 1428 |
+
|
| 1429 |
+
if hadms:
|
| 1430 |
+
# Auto-select first admission and update display
|
| 1431 |
+
first_admission = hadms[0]
|
| 1432 |
+
display_result = update_patient_eval_display(patient_id, first_admission)
|
| 1433 |
+
return gr.update(choices=hadms, value=first_admission), display_result[0], display_result[1], display_result[2]
|
| 1434 |
+
else:
|
| 1435 |
+
return gr.update(choices=[], value=None), "No patient data", "Select a patient to view AI summary...", ""
|
| 1436 |
+
|
| 1437 |
eval_patient_radio.change(
|
| 1438 |
+
fn=on_patient_change,
|
| 1439 |
inputs=[eval_patient_radio],
|
| 1440 |
+
outputs=[eval_admission_radio, patient_input_display, ai_summary_display, timestamp_display]
|
| 1441 |
)
|
| 1442 |
|
| 1443 |
eval_admission_radio.change(
|
meta-llama__Llama-3.2-3B-Instruct_io.jsonl
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|