Jesse Liu commited on
Commit
2e55dbe
·
1 Parent(s): ed8a021

Update: Switch to new JSONL data format and fix UI issues

Browse files

- Load data from meta-llama__Llama-3.2-3B-Instruct_io.jsonl
- Remove tip about multiple admissions
- Fix markdown rendering for AI Generated Summary
- Fix crash when switching patients (improved error handling)
- Auto-select first admission when patient changes

chatgpt.py CHANGED
@@ -742,7 +742,52 @@ with gr.Blocks() as app:
742
  return m.group(1) if m else ""
743
 
744
  eval_data_by_patient = {}
745
- if os.path.exists(summaries_dir):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
746
  files = os.listdir(summaries_dir)
747
  pattern = re.compile(r"^subject_(\d+)_hadm_(\d+)_(model_input|summary)\.txt$")
748
  for fname in files:
@@ -891,7 +936,6 @@ with gr.Blocks() as app:
891
  # Patient Sample Selection for Evaluation
892
  with gr.Box():
893
  gr.Markdown("## Select Patient Sample")
894
- gr.Markdown("**Tip:** Same patients may have multiple admissions - each is a separate evaluation case")
895
 
896
  # Clinician ID selection
897
  if eval_groups:
@@ -1054,7 +1098,10 @@ with gr.Blocks() as app:
1054
  if entries:
1055
  entries_sorted = sorted(entries, key=lambda x: x[1] if x[1] != 'None' else '')
1056
  hadms = [entries_sorted[-1][0]] # Keep only the most recent
1057
- return gr.update(choices=hadms, value=None)
 
 
 
1058
 
1059
  def parse_admission_label(label):
1060
  # Label format: "Admission {hadm} — {timestamp} (idx?)"
@@ -1258,37 +1305,73 @@ with gr.Blocks() as app:
1258
  return text
1259
 
1260
  def update_patient_eval_display(patient_id, admission_label):
1261
- if not patient_id or not admission_label:
1262
- return "No patient selected", "Select a patient to view AI summary...", ""
1263
- hadm_key = parse_admission_label(admission_label)
1264
- if not hadm_key:
1265
- return "No patient selected", "Select a patient to view AI summary...", ""
1266
- pid = str(patient_id)
1267
- rec = eval_data_by_patient.get(pid, {}).get(hadm_key)
1268
- if not rec:
1269
- return "Patient data not found", "AI summary not found", ""
1270
- at = rec.get('admittime') or 'None'
1271
- summary_text = rec.get('summary', '')
1272
- # Prepare patient input HTML content
1273
- input_text = rec.get('input_text', '') or ''
1274
-
1275
- # Replace ICD codes and Lab IDs with descriptions
1276
- input_text_with_descriptions = replace_icd_codes_and_lab_ids(input_text)
1277
-
1278
- sanitized_input = (
1279
- input_text_with_descriptions.replace('&', '&')
1280
- .replace('<', '&lt;')
1281
- .replace('>', '&gt;')
1282
- )
1283
- input_html = (
1284
- '<pre id="patient_input_text" '
1285
- 'style="max-height: 400px; overflow-y: auto; white-space: pre-wrap; '
1286
- 'font-family: monospace; margin: 0;">'
1287
- f'{sanitized_input}'
1288
- '</pre>'
1289
- )
1290
- summary_display = f'<pre style="white-space: pre-wrap;">{summary_text}</pre>'
1291
- return input_html, summary_display, f"Admit time: {at}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1292
 
1293
  # Handle clinician ID selection to filter patient list
1294
  if eval_groups:
@@ -1322,11 +1405,39 @@ with gr.Blocks() as app:
1322
  outputs=[eval_patient_radio, group_info_display]
1323
  )
1324
 
1325
- # Update evaluation display when patient selection changes
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1326
  eval_patient_radio.change(
1327
- fn=update_admissions_for_patient,
1328
  inputs=[eval_patient_radio],
1329
- outputs=[eval_admission_radio]
1330
  )
1331
 
1332
  eval_admission_radio.change(
 
742
  return m.group(1) if m else ""
743
 
744
  eval_data_by_patient = {}
745
+
746
+ # Load from new JSONL format first
747
+ jsonl_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'meta-llama__Llama-3.2-3B-Instruct_io.jsonl')
748
+ if os.path.exists(jsonl_path):
749
+ print(f"Loading data from JSONL: {jsonl_path}")
750
+ jsonl_data = load_jsonl(jsonl_path)
751
+ for entry in jsonl_data:
752
+ # Parse patient_id format: "11318742_admission_29646478"
753
+ patient_id_full = entry.get('patient_id', '')
754
+ if '_admission_' in patient_id_full:
755
+ parts = patient_id_full.split('_admission_')
756
+ if len(parts) == 2:
757
+ patient_id = parts[0]
758
+ admission_id = parts[1]
759
+ else:
760
+ # Fallback: use full string as patient_id
761
+ patient_id = patient_id_full
762
+ admission_id = '1'
763
+ else:
764
+ patient_id = patient_id_full
765
+ admission_id = '1'
766
+
767
+ input_text = entry.get('input', '')
768
+ summary = entry.get('output', '')
769
+
770
+ # Extract admission time from input text if available
771
+ admittime = None
772
+ if input_text:
773
+ adm_match = re.search(r'Admission Time:\s*(\d{4}-\d{2}-\d{2}[\sT]\d{2}:\d{2}:\d{2})', input_text)
774
+ if adm_match:
775
+ admittime = adm_match.group(1)
776
+
777
+ patient_dict = eval_data_by_patient.setdefault(patient_id, {})
778
+ patient_dict[admission_id] = {
779
+ 'patient_id': patient_id,
780
+ 'admission_id': admission_id,
781
+ 'input_text': input_text,
782
+ 'summary': summary,
783
+ 'admittime': admittime,
784
+ 'timestamp': admittime,
785
+ 'highlights': []
786
+ }
787
+ print(f"Loaded {len(eval_data_by_patient)} patients from JSONL")
788
+
789
+ # Fallback to old format if JSONL not found
790
+ if not eval_data_by_patient and os.path.exists(summaries_dir):
791
  files = os.listdir(summaries_dir)
792
  pattern = re.compile(r"^subject_(\d+)_hadm_(\d+)_(model_input|summary)\.txt$")
793
  for fname in files:
 
936
  # Patient Sample Selection for Evaluation
937
  with gr.Box():
938
  gr.Markdown("## Select Patient Sample")
 
939
 
940
  # Clinician ID selection
941
  if eval_groups:
 
1098
  if entries:
1099
  entries_sorted = sorted(entries, key=lambda x: x[1] if x[1] != 'None' else '')
1100
  hadms = [entries_sorted[-1][0]] # Keep only the most recent
1101
+ else:
1102
+ # If no entries, create a default one
1103
+ hadms = [f"Admission {list(eval_data_by_patient[patient_id].keys())[0]}"]
1104
+ return gr.update(choices=hadms, value=hadms[0] if hadms else None)
1105
 
1106
  def parse_admission_label(label):
1107
  # Label format: "Admission {hadm} — {timestamp} (idx?)"
 
1305
  return text
1306
 
1307
  def update_patient_eval_display(patient_id, admission_label):
1308
+ try:
1309
+ if not patient_id:
1310
+ return "No patient selected", "Select a patient to view AI summary...", ""
1311
+
1312
+ # If no admission_label, try to get the first one
1313
+ if not admission_label:
1314
+ pid = str(patient_id)
1315
+ if pid in eval_data_by_patient and eval_data_by_patient[pid]:
1316
+ # Get first admission
1317
+ first_key = list(eval_data_by_patient[pid].keys())[0]
1318
+ rec = eval_data_by_patient[pid][first_key]
1319
+ else:
1320
+ return "No patient selected", "Select a patient to view AI summary...", ""
1321
+ else:
1322
+ hadm_key = parse_admission_label(admission_label)
1323
+ if not hadm_key:
1324
+ # Try direct match
1325
+ pid = str(patient_id)
1326
+ if pid in eval_data_by_patient:
1327
+ # Try to find matching admission
1328
+ for key in eval_data_by_patient[pid].keys():
1329
+ if key == admission_label or str(key) == admission_label:
1330
+ hadm_key = key
1331
+ break
1332
+ if not hadm_key:
1333
+ # Use first available
1334
+ hadm_key = list(eval_data_by_patient[pid].keys())[0]
1335
+ else:
1336
+ return "No patient selected", "Select a patient to view AI summary...", ""
1337
+
1338
+ pid = str(patient_id)
1339
+ rec = eval_data_by_patient.get(pid, {}).get(hadm_key)
1340
+ if not rec:
1341
+ # Try to find any admission for this patient
1342
+ if pid in eval_data_by_patient and eval_data_by_patient[pid]:
1343
+ first_key = list(eval_data_by_patient[pid].keys())[0]
1344
+ rec = eval_data_by_patient[pid][first_key]
1345
+ else:
1346
+ return "Patient data not found", "AI summary not found", ""
1347
+
1348
+ at = rec.get('admittime') or 'None'
1349
+ summary_text = rec.get('summary', '') or 'No summary available'
1350
+ # Prepare patient input HTML content
1351
+ input_text = rec.get('input_text', '') or 'No input data available'
1352
+
1353
+ # Replace ICD codes and Lab IDs with descriptions
1354
+ input_text_with_descriptions = replace_icd_codes_and_lab_ids(input_text)
1355
+
1356
+ sanitized_input = (
1357
+ input_text_with_descriptions.replace('&', '&amp;')
1358
+ .replace('<', '&lt;')
1359
+ .replace('>', '&gt;')
1360
+ )
1361
+ input_html = (
1362
+ '<pre id="patient_input_text" '
1363
+ 'style="max-height: 400px; overflow-y: auto; white-space: pre-wrap; '
1364
+ 'font-family: monospace; margin: 0;">'
1365
+ f'{sanitized_input}'
1366
+ '</pre>'
1367
+ )
1368
+ # Use Markdown component for proper rendering - return raw text
1369
+ return input_html, summary_text, f"Admit time: {at}"
1370
+ except Exception as e:
1371
+ import traceback
1372
+ error_msg = f"Error loading patient data: {str(e)}\n{traceback.format_exc()}"
1373
+ print(f"[ERROR] {error_msg}")
1374
+ return "Error loading data", f"Error: {str(e)}", ""
1375
 
1376
  # Handle clinician ID selection to filter patient list
1377
  if eval_groups:
 
1405
  outputs=[eval_patient_radio, group_info_display]
1406
  )
1407
 
1408
+ # When patient changes, update admissions and auto-select first admission
1409
+ def on_patient_change(patient_id):
1410
+ """When patient changes, update admissions and auto-select first admission"""
1411
+ if not patient_id:
1412
+ return gr.update(choices=[], value=None), "No patient selected", "Select a patient to view AI summary...", ""
1413
+
1414
+ # Get admissions for this patient
1415
+ hadms = []
1416
+ if patient_id in eval_data_by_patient:
1417
+ entries = []
1418
+ for hadm_key, rec in eval_data_by_patient[patient_id].items():
1419
+ at = rec.get('admittime') if rec.get('admittime') else 'None'
1420
+ base_hadm = rec.get('admission_id') or hadm_key
1421
+ idx = rec.get('admission_index')
1422
+ suffix = f" ({idx})" if idx else ""
1423
+ label = f"Admission {base_hadm} — {at}{suffix}"
1424
+ entries.append((label, at))
1425
+ if entries:
1426
+ entries_sorted = sorted(entries, key=lambda x: x[1] if x[1] != 'None' else '')
1427
+ hadms = [e[0] for e in entries_sorted]
1428
+
1429
+ if hadms:
1430
+ # Auto-select first admission and update display
1431
+ first_admission = hadms[0]
1432
+ display_result = update_patient_eval_display(patient_id, first_admission)
1433
+ return gr.update(choices=hadms, value=first_admission), display_result[0], display_result[1], display_result[2]
1434
+ else:
1435
+ return gr.update(choices=[], value=None), "No patient data", "Select a patient to view AI summary...", ""
1436
+
1437
  eval_patient_radio.change(
1438
+ fn=on_patient_change,
1439
  inputs=[eval_patient_radio],
1440
+ outputs=[eval_admission_radio, patient_input_display, ai_summary_display, timestamp_display]
1441
  )
1442
 
1443
  eval_admission_radio.change(
meta-llama__Llama-3.2-3B-Instruct_io.jsonl ADDED
The diff for this file is too large to render. See raw diff