Spaces:

JesseLiu
/

chatbot-mimic-notes

Sleeping

Jesse Liu commited on Dec 10, 2025

Commit

2e55dbe

1 Parent(s): ed8a021

Update: Switch to new JSONL data format and fix UI issues

- Load data from meta-llama__Llama-3.2-3B-Instruct_io.jsonl
- Remove tip about multiple admissions
- Fix markdown rendering for AI Generated Summary
- Fix crash when switching patients (improved error handling)
- Auto-select first admission when patient changes

Files changed (2) hide show

chatgpt.py +148 -37
meta-llama__Llama-3.2-3B-Instruct_io.jsonl +0 -0

chatgpt.py CHANGED Viewed

@@ -742,7 +742,52 @@ with gr.Blocks() as app:
         return m.group(1) if m else ""
     eval_data_by_patient = {}
-    if os.path.exists(summaries_dir):
         files = os.listdir(summaries_dir)
         pattern = re.compile(r"^subject_(\d+)_hadm_(\d+)_(model_input|summary)\.txt$")
         for fname in files:
@@ -891,7 +936,6 @@ with gr.Blocks() as app:
                     # Patient Sample Selection for Evaluation
                     with gr.Box():
                         gr.Markdown("## Select Patient Sample")
-                        gr.Markdown("**Tip:** Same patients may have multiple admissions - each is a separate evaluation case")
                         # Clinician ID selection
                         if eval_groups:
@@ -1054,7 +1098,10 @@ with gr.Blocks() as app:
                     if entries:
                         entries_sorted = sorted(entries, key=lambda x: x[1] if x[1] != 'None' else '')
                         hadms = [entries_sorted[-1][0]]  # Keep only the most recent
-                return gr.update(choices=hadms, value=None)
             def parse_admission_label(label):
                 # Label format: "Admission {hadm} — {timestamp} (idx?)"
@@ -1258,37 +1305,73 @@ with gr.Blocks() as app:
                 return text
             def update_patient_eval_display(patient_id, admission_label):
-                if not patient_id or not admission_label:
-                    return "No patient selected", "Select a patient to view AI summary...", ""
-                hadm_key = parse_admission_label(admission_label)
-                if not hadm_key:
-                    return "No patient selected", "Select a patient to view AI summary...", ""
-                pid = str(patient_id)
-                rec = eval_data_by_patient.get(pid, {}).get(hadm_key)
-                if not rec:
-                    return "Patient data not found", "AI summary not found", ""
-                at = rec.get('admittime') or 'None'
-                summary_text = rec.get('summary', '')
-                # Prepare patient input HTML content
-                input_text = rec.get('input_text', '') or ''
-                # Replace ICD codes and Lab IDs with descriptions
-                input_text_with_descriptions = replace_icd_codes_and_lab_ids(input_text)
-                sanitized_input = (
-                    input_text_with_descriptions.replace('&', '&amp;')
-                    .replace('<', '&lt;')
-                    .replace('>', '&gt;')
-                )
-                input_html = (
-                    '<pre id="patient_input_text" '
-                    'style="max-height: 400px; overflow-y: auto; white-space: pre-wrap; '
-                    'font-family: monospace; margin: 0;">'
-                    f'{sanitized_input}'
-                    '</pre>'
-                )
-                summary_display = f'<pre style="white-space: pre-wrap;">{summary_text}</pre>'
-                return input_html, summary_display, f"Admit time: {at}"
             # Handle clinician ID selection to filter patient list
             if eval_groups:
@@ -1322,11 +1405,39 @@ with gr.Blocks() as app:
                     outputs=[eval_patient_radio, group_info_display]
                 )
-            # Update evaluation display when patient selection changes
             eval_patient_radio.change(
-                fn=update_admissions_for_patient,
                 inputs=[eval_patient_radio],
-                outputs=[eval_admission_radio]
             )
             eval_admission_radio.change(

         return m.group(1) if m else ""
     eval_data_by_patient = {}
+    # Load from new JSONL format first
+    jsonl_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'meta-llama__Llama-3.2-3B-Instruct_io.jsonl')
+    if os.path.exists(jsonl_path):
+        print(f"Loading data from JSONL: {jsonl_path}")
+        jsonl_data = load_jsonl(jsonl_path)
+        for entry in jsonl_data:
+            # Parse patient_id format: "11318742_admission_29646478"
+            patient_id_full = entry.get('patient_id', '')
+            if '_admission_' in patient_id_full:
+                parts = patient_id_full.split('_admission_')
+                if len(parts) == 2:
+                    patient_id = parts[0]
+                    admission_id = parts[1]
+                else:
+                    # Fallback: use full string as patient_id
+                    patient_id = patient_id_full
+                    admission_id = '1'
+            else:
+                patient_id = patient_id_full
+                admission_id = '1'
+            input_text = entry.get('input', '')
+            summary = entry.get('output', '')
+            # Extract admission time from input text if available
+            admittime = None
+            if input_text:
+                adm_match = re.search(r'Admission Time:\s*(\d{4}-\d{2}-\d{2}[\sT]\d{2}:\d{2}:\d{2})', input_text)
+                if adm_match:
+                    admittime = adm_match.group(1)
+            patient_dict = eval_data_by_patient.setdefault(patient_id, {})
+            patient_dict[admission_id] = {
+                'patient_id': patient_id,
+                'admission_id': admission_id,
+                'input_text': input_text,
+                'summary': summary,
+                'admittime': admittime,
+                'timestamp': admittime,
+                'highlights': []
+            }
+        print(f"Loaded {len(eval_data_by_patient)} patients from JSONL")
+    # Fallback to old format if JSONL not found
+    if not eval_data_by_patient and os.path.exists(summaries_dir):
         files = os.listdir(summaries_dir)
         pattern = re.compile(r"^subject_(\d+)_hadm_(\d+)_(model_input|summary)\.txt$")
         for fname in files:
                     # Patient Sample Selection for Evaluation
                     with gr.Box():
                         gr.Markdown("## Select Patient Sample")
                         # Clinician ID selection
                         if eval_groups:
                     if entries:
                         entries_sorted = sorted(entries, key=lambda x: x[1] if x[1] != 'None' else '')
                         hadms = [entries_sorted[-1][0]]  # Keep only the most recent
+                    else:
+                        # If no entries, create a default one
+                        hadms = [f"Admission {list(eval_data_by_patient[patient_id].keys())[0]}"]
+                return gr.update(choices=hadms, value=hadms[0] if hadms else None)
             def parse_admission_label(label):
                 # Label format: "Admission {hadm} — {timestamp} (idx?)"
                 return text
             def update_patient_eval_display(patient_id, admission_label):
+                try:
+                    if not patient_id:
+                        return "No patient selected", "Select a patient to view AI summary...", ""
+                    # If no admission_label, try to get the first one
+                    if not admission_label:
+                        pid = str(patient_id)
+                        if pid in eval_data_by_patient and eval_data_by_patient[pid]:
+                            # Get first admission
+                            first_key = list(eval_data_by_patient[pid].keys())[0]
+                            rec = eval_data_by_patient[pid][first_key]
+                        else:
+                            return "No patient selected", "Select a patient to view AI summary...", ""
+                    else:
+                        hadm_key = parse_admission_label(admission_label)
+                        if not hadm_key:
+                            # Try direct match
+                            pid = str(patient_id)
+                            if pid in eval_data_by_patient:
+                                # Try to find matching admission
+                                for key in eval_data_by_patient[pid].keys():
+                                    if key == admission_label or str(key) == admission_label:
+                                        hadm_key = key
+                                        break
+                                if not hadm_key:
+                                    # Use first available
+                                    hadm_key = list(eval_data_by_patient[pid].keys())[0]
+                            else:
+                                return "No patient selected", "Select a patient to view AI summary...", ""
+                        pid = str(patient_id)
+                        rec = eval_data_by_patient.get(pid, {}).get(hadm_key)
+                        if not rec:
+                            # Try to find any admission for this patient
+                            if pid in eval_data_by_patient and eval_data_by_patient[pid]:
+                                first_key = list(eval_data_by_patient[pid].keys())[0]
+                                rec = eval_data_by_patient[pid][first_key]
+                            else:
+                                return "Patient data not found", "AI summary not found", ""
+                    at = rec.get('admittime') or 'None'
+                    summary_text = rec.get('summary', '') or 'No summary available'
+                    # Prepare patient input HTML content
+                    input_text = rec.get('input_text', '') or 'No input data available'
+                    # Replace ICD codes and Lab IDs with descriptions
+                    input_text_with_descriptions = replace_icd_codes_and_lab_ids(input_text)
+                    sanitized_input = (
+                        input_text_with_descriptions.replace('&', '&amp;')
+                        .replace('<', '&lt;')
+                        .replace('>', '&gt;')
+                    )
+                    input_html = (
+                        '<pre id="patient_input_text" '
+                        'style="max-height: 400px; overflow-y: auto; white-space: pre-wrap; '
+                        'font-family: monospace; margin: 0;">'
+                        f'{sanitized_input}'
+                        '</pre>'
+                    )
+                    # Use Markdown component for proper rendering - return raw text
+                    return input_html, summary_text, f"Admit time: {at}"
+                except Exception as e:
+                    import traceback
+                    error_msg = f"Error loading patient data: {str(e)}\n{traceback.format_exc()}"
+                    print(f"[ERROR] {error_msg}")
+                    return "Error loading data", f"Error: {str(e)}", ""
             # Handle clinician ID selection to filter patient list
             if eval_groups:
                     outputs=[eval_patient_radio, group_info_display]
                 )
+            # When patient changes, update admissions and auto-select first admission
+            def on_patient_change(patient_id):
+                """When patient changes, update admissions and auto-select first admission"""
+                if not patient_id:
+                    return gr.update(choices=[], value=None), "No patient selected", "Select a patient to view AI summary...", ""
+                # Get admissions for this patient
+                hadms = []
+                if patient_id in eval_data_by_patient:
+                    entries = []
+                    for hadm_key, rec in eval_data_by_patient[patient_id].items():
+                        at = rec.get('admittime') if rec.get('admittime') else 'None'
+                        base_hadm = rec.get('admission_id') or hadm_key
+                        idx = rec.get('admission_index')
+                        suffix = f" ({idx})" if idx else ""
+                        label = f"Admission {base_hadm} — {at}{suffix}"
+                        entries.append((label, at))
+                    if entries:
+                        entries_sorted = sorted(entries, key=lambda x: x[1] if x[1] != 'None' else '')
+                        hadms = [e[0] for e in entries_sorted]
+                if hadms:
+                    # Auto-select first admission and update display
+                    first_admission = hadms[0]
+                    display_result = update_patient_eval_display(patient_id, first_admission)
+                    return gr.update(choices=hadms, value=first_admission), display_result[0], display_result[1], display_result[2]
+                else:
+                    return gr.update(choices=[], value=None), "No patient data", "Select a patient to view AI summary...", ""
             eval_patient_radio.change(
+                fn=on_patient_change,
                 inputs=[eval_patient_radio],
+                outputs=[eval_admission_radio, patient_input_display, ai_summary_display, timestamp_display]
             )
             eval_admission_radio.change(

meta-llama__Llama-3.2-3B-Instruct_io.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff