# pedagogical_builder.py - V231.18 # Server-side pedagogical templates for BuddyMath # Adds detailed explanations WITHOUT using LLM tokens! """ Pedagogical Builder for BuddyMath Follows Iron Law #4: CHILD-FRIENDLY ALWAYS LLM returns ONLY mathematical core (100-200 tokens). Server adds full pedagogical wrapper (0 tokens!). """ import re from domain.processing_strategy import ProcessingStrategy class LLMSchemaError(Exception): """Custom error for LLM output validation failures.""" pass # validate_narrative_density (Unified at line 362) # ==================== PEDAGOGICAL TEMPLATES ==================== PEDAGOGICAL_TEMPLATES = { # ========== GEOMETRY ========== "CIRCLE_EQUATION": { "intro": { "title": "מקום גיאומטרי או משוואת מעגל", "content": "בואו נפתור צעד אחר צעד ונשתמש במשוואת המעגל והמרחק לפי הצורך.", "tip": "חשוב להבין את המשמעות הגיאומטרית של הנתונים" }, "steps": [ { "title": "מה נתון לנו?", "uses_llm": "approach", "tip": "תמיד מסדרים את הנתונים קודם" }, { "title": "איך מחשבים מרחק?", "content": "נזכיר את נוסחת המרחק: המרחק מנקודה $(x,y)$ לנקודה $(a,b)$ הוא $d = \\sqrt{{(x-a)^2 + (y-b)^2}}$", "block_math": "d = \\sqrt{(x-a)^2 + (y-b)^2}", "tip": "זו נוסחת פיתגורס במסווה!" }, { "title": "חישוב הפתרון", "uses_llm": "steps" }, { "title": "התשובה הסופית", "content": "קיבלנו את התשובה הסופית!", "uses_llm": "solution" } ], "closing": "כל הכבוד! הצלחנו לפתור את השאלה המורכבת הזו! 🎉" }, "DERIVATIVE_QUOTIENT": { "intro": { "title": "נגזרת של מנה", "content": "כשיש לנו פונקציה שהיא מנה (חילוק) של שתי פונקציות, נשתמש בכלל המנה.", "tip": "כלל המנה: $(\\frac{u}{v})' = \\frac{u'v - uv'}{v^2}$" }, "steps": [ { "title": "זיהוי המונה והמכנה", "template": "המונה: $u = {numerator}$\nהמכנה: $v = {denominator}$" }, { "title": "נגזרת המונה", "content": "נמצא את $u'$ (נגזרת המונה):", "uses_llm": "u_prime" }, { "title": "נגזרת המכנה", "content": "נמצא את $v'$ (נגזרת המכנה):", "uses_llm": "v_prime" }, { "title": "נציב בכלל המנה", "content": "עכשיו נציב בנוסחה: $(\\frac{u}{v})' = \\frac{u'v - uv'}{v^2}$", "uses_llm": "derivative", "tip": "שים לב לסדר: u'v **פחות** uv'" } ], "closing": "מעולה! שלטת בכלל המנה! 💪" }, "LINEAR_EQUATION": { "intro": { "title": "פתרון משוואה לינארית", "content": "משוואה לינארית היא משוואה שבה המשתנה מופיע בחזקה 1 בלבד. המטרה: לבודד את x.", "tip": "כלל הזהב: מה שעושים בצד אחד, עושים גם בצד השני!" }, "steps": [ { "title": "המשוואה שלנו", "template": "נתון: ${equation}$" }, { "title": "פתרון שלב אחר שלב", "uses_llm": "steps", "tip": "בכל שלב, נקרב את x לבידוד" }, { "title": "התשובה", "template": "הפתרון: $x = {solution}$" } ], "closing": "יפה! פתרת את המשוואה! ✅" }, # ========== GENERAL / ALGEBRA ========== "GENERAL": { "intro": { "title": "ניתוח השאלה", "content": "בואו נראה מה נתון לנו ומה צריך למצוא. נפרק את הבעיה לשלבים פשוטים.", "tip": "קריאה נכונה של השאלה היא 50% מהפתרון!" }, "steps": [ { "title": "מה נתון?", "content": "נסדר את הנתונים והמשוואות בצורה ברורה.", "uses_llm": "approach" # Use approach/strategy as step 1 }, { "title": "דרך הפתרון", "uses_llm": "steps", "tip": "נפתור שלב אחר שלב בצורה מסודרת" }, { "title": "תשובה סופית", "template": "הגענו לתוצאה: {solution}", "uses_llm": "solution" } ], "closing": "מצוין! סיימנו את הסעיף הזה בהצלחה." }, # Alias for Rational Function (uses General structure but refined) "RATIONAL_FUNCTION": { "intro": { "title": "חקירת פונקציה רציונלית", "content": "פונקציה רציונלית היא מנה של פולינומים. נבדוק תחום הגדרה, אסימפטוטות ונקודות מיוחדות.", "tip": "חשוב לבדוק מתי המכנה מתאפס!" }, "steps": [ { "title": "ניתוח הפונקציה", "content": "נסתכל על המונה והמכנה ונראה אם אפשר לפשט.", "uses_llm": "approach" }, { "title": "הפתרון המלא", "uses_llm": "steps", "tip": "עבודה מסודרת מונעת טעויות חישוב" }, { "title": "סיכום", "uses_llm": "solution" } ], "closing": "כל הכבוד! חקירה יסודית היא המפתח." }, # ========== TRIGONOMETRY ========== "TRIGONOMETRY": { "intro": { "title": "חשבון טריגונומטרי", "content": "נשתמש בזהויות טריגונומטריות ובתכונות המשולש כדי לפתור.", "tip": "זכור: sin²x + cos²x = 1" }, "steps": [ { "title": "זיהוי המצב", "content": "נבדוק אילו זווית וצלעות נתונות לנו.", "uses_llm": "approach" }, { "title": "ביצוע החישוב", "uses_llm": "steps", "tip": "שימו לב יחידות מעלות/רדיאנים!" }, { "title": "התשובה", "template": "התוצאה: {solution}", "uses_llm": "solution" } ], "closing": "מצוין! הטריגונומטריה בידינו! 📐" }, # Alias for basic trig "TRIG_BASIC": { "intro": { "title": "טריגונומטריה בסיסית", "content": "חישוב זוויות וצלעות במשולש ישר זווית." }, "steps": [{"title": "פתרון", "uses_llm": "steps"}, {"title": "תשובה", "uses_llm": "solution"}], "closing": "יופי!" } } def build_pedagogical_response( topic_id: str, llm_output: dict, data_anchor: dict, custom_title: str = None, # V260.3: Allow override proof_graph = None, # V1.1: Immutable ProofGraph processing_strategy: ProcessingStrategy = None # V5.8.0: Intent Contract ) -> dict: """ V4.2 (Behavioral Firewall): Projection-Only Builder. The UI serves ONLY as a projection of the mathematical ProofGraph. LLM math generation is strictly forbidden. """ try: print(f"🧱 [V4.2] Projection-Only Mode: topic={topic_id}, ProofGraph={proof_graph is not None}") print(f"DEBUG [PRE-SCRUB]: LLM generated raw narrative: {llm_output}") if not proof_graph or not proof_graph.steps: # V5.8.0: Enforce Intent Matrix! If strategy is STRICT_SYMBOLIC, failure to provide graph is a fatal error. if processing_strategy == ProcessingStrategy.STRICT_SYMBOLIC: print(f"🛑 [V5.8.0] STRICT_SYMBOLIC Violation: No ProofGraph provided. Blocking response.") raise LLMSchemaError("Truth Authority Violation: STRICT_SYMBOLIC strategy requires a verified ProofGraph.") if processing_strategy == ProcessingStrategy.HEURISTIC_DEDUCTION: print(f"✅ [V7.3] HEURISTIC_DEDUCTION detected. Bypassing Truth Authority.") return _build_generic_response(llm_output, custom_title=custom_title) if isinstance(llm_output, list) and len(llm_output) > 0: print(f"✅ [V7.3] Hybrid Navigation detected (List Segment). Bypassing ProofGraph requirement.") return _build_generic_response(llm_output, custom_title=custom_title) if isinstance(llm_output, dict) and ("solution_markdown" in llm_output or "steps" in llm_output or "chain_of_thought" in llm_output): return _build_generic_response(llm_output, custom_title=custom_title) # V8.5 RESILIENCE: One more attempt to find steps if we're failing if isinstance(llm_output, dict) and "sections" in llm_output: return _build_generic_response(llm_output, custom_title=custom_title) # If no clues at all, THEN we raise logger.warning(f"⚠️ [V8.5] Truth Authority Violation: Falling back to generic due to invalid structure: {llm_output}") return _build_generic_response(llm_output, custom_title=custom_title) # 1. Map ProofGraph to Immutable Truth Nodes sympy_nodes = [] for step in proof_graph.steps: sympy_nodes.append({ "step_id": step.step_id, "block_math": step.math_content, "title": step.logic_description or f"שלב {step.step_id}" }) # 2. Extract explanations from LLM (The "Skin") - V4.2.7 supports list or dict if isinstance(llm_output, list): llm_explanations = llm_output else: # V5.8.2: Support parsing nested 'sections' from the LLM output llm_explanations = llm_output.get("steps_explanations", llm_output.get("steps", [])) if not llm_explanations and "sections" in llm_output: for section in llm_output["sections"]: if "steps" in section: llm_explanations.extend(section["steps"]) if not llm_explanations: # Internal Fallback: If LLM failed, use generic text to preserve UI llm_explanations = [{"step_id": s["step_id"], "explanation_text": "נבצע את החישוב המתמטי"} for s in sympy_nodes] else: # V276.1: Normalize explanations to handle structured content/type keys for node in llm_explanations: if "explanation_text" not in node or not node["explanation_text"]: node["explanation_text"] = node.get("content_mixed", node.get("content", node.get("explanation", ""))) # Unpack dict if still found if isinstance(node["explanation_text"], dict): node["explanation_text"] = node["explanation_text"].get("content", node["explanation_text"].get("text", str(node["explanation_text"]))) # V5.8.2: Layer 2 Runtime Validator (The Kill Switch) for node in llm_explanations: text = node.get("explanation_text", "") if not validate_narrative_density(text): print(f"🛑 [V5.8.2] KILL SWITCH TRIGGERED on text: {text}") raise LLMSchemaError("NARRATIVE_OVERFLOW: Explanation is too dense or contains forbidden math/English characters.") # 3. Deterministic Merge (Iron Law) - V4.2.7: explanation_text merged_steps = merge_and_verify_explanations(sympy_nodes, llm_explanations) # 5. UI Projection (Hard Decoupling V4.2.10) ui_steps = [] for i, node in enumerate(merged_steps): # V8.6.2: Ensure LaTeX preserved in content_mixed (removed aggressive $ and \ stripping) explanation = sanitize_math_text(node["explanation_text"]) math_content = node["block_math"] ui_steps.append({ "step_id": node["step_id"], "step_number": i + 1, "explanation_text": explanation, "math_artifact": { "type": "equation", "latex": math_content, "table_data": "" }, # We keep these for one more version as 'Ghost Keys' for extreme backward compatibility # but they now mirror the structured data perfectly. "content_mixed": explanation, "block_math": math_content }) # V8.6: Inject 'approach' as Step 0 to ensure Flutter displays it approach = llm_output.get("approach") if approach and isinstance(approach, str): ui_steps.insert(0, { "step_id": 0, "step_number": 0, "title": "איך ניגשים לזה? 🧭", "explanation_text": sanitize_math_text(approach), "content_mixed": sanitize_math_text(approach), "math_artifact": {"type": "equation", "latex": ""}, "block_math": "" }) # V8.6.2: Final check on teacher_summary from LLM summary = llm_output.get("teacher_summary") or llm_output.get("summary") response = { "sections": [{ "section_title": custom_title or "פתרון מלא ומדויק", "steps": ui_steps, "section_result": merged_steps[-1]["block_math"] if merged_steps else "" }], "final_answer": merged_steps[-1]["block_math"] if merged_steps else "", "teacher_closing": llm_output.get("teacher_closing", "כל הכבוד על פתרון התרגיל! 🎉"), "approach": approach, "teacher_summary": summary } # V260.5: Propagate Investigation Data (Crucial for Table UI) if "investigation" in llm_output: response["investigation"] = llm_output["investigation"] elif "investigation_table" in llm_output: response["investigation"] = llm_output["investigation_table"] return apply_cognitive_load_limiter(response) except Exception as e: logger.error(f"🚨 [V8.5 RESILIENCE] Builder Crash: {e}. Falling back to generic.") return _build_generic_response(llm_output, custom_title=custom_title) def apply_cognitive_load_limiter(response: dict) -> dict: """ V1.1: Cognitive Load Limiter. Ensures steps are revealed gradually. """ if "sections" not in response: return response # Limit to first 2 steps if complex, mark others as 'hidden' step_count = 0 for section in response["sections"]: if "steps" in section: for step in section["steps"]: step_count += 1 # V1.1 Rule: If more than 3 steps, flag the rest for gradual disclosure if step_count > 3: step["disclosure_state"] = "HIDDEN" else: step["disclosure_state"] = "VISIBLE" return response def validate_narrative_density(text: str) -> bool: """ V5.8.2: Layer 2 Runtime Validator (Kill Switch). Checks if the pedagogical explanation adheres to the Hard Doctrine. V8.5: RESILIENCE - Relaxed to allow math symbols in text-only steps. Returns False ONLY if it is too long (runaway LLM) or contains dangerous code. """ if len(text) > 400: return False # V8.5: Increased tolerance for English letters (ABC labels) and math signs. # We only block forbidden programmatic keywords like 'def', 'class', etc. import re if re.search(r'\b(import|def|class|lambda)\b', text): return False return True def merge_and_verify_explanations(sympy_nodes: list[dict], llm_explanations: list[dict]) -> list[dict]: """ V2.5.3: The Swiss Watch Maneuver. V3.1.3: Hardened Merge Phase with robust guards. V5.8.2: Robust Merge (Option 1) to ignore LLM self-referencing narrative drift. Merges Immutable SymPy math (Truth) with LLM explanations (Skin). """ final_nodes = [] # V3.1.3: Mandatory initialization try: # V5.8.2 Robust Merge for step in sympy_nodes: sid = step["step_id"] # Find all LLM explanations for this step candidates = [ s for s in llm_explanations if s.get("step_id") == sid and "explanation_text" in s ] if not candidates: # If LLM completely missed a step, fallback to generic final_nodes.append({ **step, "explanation_text": "נבצע את החישוב המתמטי." }) continue # Take the last candidate to ignore preamble/meta-commentary drift best_candidate = candidates[-1] explanation_text = best_candidate["explanation_text"] # V6 Narrative Drift Telemetry if "allowed_concepts" in step and step["allowed_concepts"]: import re words = [w for w in re.split(r'\s+', explanation_text) if len(w) > 2] # simple word split ignoring short connectives allowed_words = set() for concept in step["allowed_concepts"]: allowed_words.update(concept.split()) unauthorized_words = [w for w in words if w not in allowed_words] drift_percentage = (len(unauthorized_words) / max(len(words), 1)) * 100 if drift_percentage > 50.0: import logging logger = logging.getLogger(__name__) logger.warning(f"⚠️ [V6 TELEMETRY] Drift Warning: {drift_percentage:.1f}% concept drift in Step {sid} (unauthorized: {unauthorized_words[:3]}...)") # V5.8.2 Kill Switch Validator Call if not validate_narrative_density(explanation_text): msg = f"NARRATIVE_OVERFLOW: Explanation rejected by Kill Switch: {explanation_text[:20]}..." print(f"🚨 [V5.8.2] {msg}") raise LLMSchemaError("NARRATIVE_OVERFLOW") merged_node = { **step, "explanation_text": explanation_text } final_nodes.append(merged_node) return final_nodes except LLMSchemaError: raise except Exception as e: import logging logger = logging.getLogger(__name__) logger.error(f"🚨 [V3.1.3] Merge Phase Failed: {e}") # Since orchestrator is downstream, we re-raise or return something that indicates failure. raise LLMSchemaError(f"Merge failure: {str(e)}") def _normalize_llm_keys(llm_output: dict) -> dict: """V275.3: Map alternative LLM output keys to expected template keys. E.g., CIRCLE_EQUATION returns 'equation' but GENERAL template expects 'solution'.""" result = dict(llm_output) # Map equation -> solution if solution is missing if "solution" not in result and "equation" in result: result["solution"] = result["equation"] # Map approach alternatives if "approach" not in result and "strategy" in result: result["approach"] = result["strategy"] return result def _build_template_response(topic_id: str, llm_output: dict, data_anchor: dict) -> dict: """Build response using topic-specific template.""" template = PEDAGOGICAL_TEMPLATES[topic_id] # V275.3: Normalize LLM keys so templates always find what they need llm_output = _normalize_llm_keys(llm_output) # Build response section_data = { "section_title": "פתרון מלא", "steps": [], "section_result": llm_output.get("equation") or llm_output.get("solution") or llm_output.get("derivative") # V262.0: Per-section result } response = { "sections": [section_data], "final_answer": section_data["section_result"], "teacher_closing": template.get("closing", "כל הכבוד! 🎉"), "teacher_summary": llm_output.get("teacher_summary") # V262.2: Propagate explicit summary } # Add intro step if "intro" in template: intro = template["intro"] response["sections"][0]["steps"].append({ "step_number": 0, "title": intro["title"], "content_mixed": intro["content"], "teacher_tip": intro.get("tip", "") }) # Add main steps for i, step_template in enumerate(template["steps"], start=1): # V275.2 FIX: Handle unpacking of 'steps' list from llm_output gracefully! if step_template.get("uses_llm") == "steps" and isinstance(llm_output.get("steps"), list): for s in llm_output["steps"]: steps_count = len(response["sections"][0]["steps"]) # V275.3: Check multiple content key names (different micro-prompts use different schemas) content = s.get("content_mixed", s.get("content", s.get("explanation", ""))) new_step = { "step_number": steps_count + 1, "title": s.get("title", f"שלב {steps_count + 1}"), "content_mixed": sanitize_math_text(content), "block_math": s.get("block_math", s.get("result", "")), "teacher_tip": s.get("teacher_tip", step_template.get("tip")) } response["sections"][0]["steps"].append(new_step) continue step = { "step_number": len(response["sections"][0]["steps"]) + 1, "title": step_template["title"] } # Fill content if "template" in step_template: # Template with data substitution (merge with llm_output to prevent KeyError) try: content = step_template["template"].format(**{**data_anchor, **llm_output}) except KeyError: # V275.3: Strip unresolved {variable} placeholders instead of showing them raw content = re.sub(r'\{\w+\}', '', step_template["template"]).strip() step["content_mixed"] = content if "content" in step_template: step["content_mixed"] = step_template["content"] if "block_math" in step_template: step["block_math"] = step_template["block_math"] if "uses_llm" in step_template: # Use LLM output llm_key = step_template["uses_llm"] if llm_key in llm_output: if llm_key == "solution" or llm_key == "approach": # Solutions and approaches usually have Hebrew, put them in content to prevent flutter crash # If there's already template content, append to it if step.get("content_mixed"): step["content_mixed"] += "\n" + sanitize_math_text(str(llm_output[llm_key])) else: step["content_mixed"] = sanitize_math_text(str(llm_output[llm_key])) else: step["block_math"] = sanitize_math_text(str(llm_output[llm_key])) if "tip" in step_template: step["teacher_tip"] = step_template["tip"] response["sections"][0]["steps"].append(step) return response import gibberish_detector # V231.25: Fix gibberish! def sanitize_math_text(text: str) -> str: """ V231.23: Remove English math artifacts and enforce Hebrew/Latex conventions. Forces 'Angle' -> '\\angle', 'Triangle' -> '\\triangle', 'Area' -> 'S'. V260.4: Also runs auto_fix_gibberish (reversed Hebrew, broken Latex). """ if not text: return text # V260.4: First, fix structural gibberish (reversed Hebrew, broken LaTeX) text = gibberish_detector.auto_fix_gibberish(text) # V275.3: Fix quadruple dollars $$$$ -> $$ EARLY (before any block processing) text = re.sub(r'\${3,}', '$$', text) # V275.4: CRITICAL - Detect and unwrap $$Hebrew paragraph$$ blocks # The LLM wraps Hebrew explanations in $$...$$ which renders as garbled "mirror text" # Key insight: Hebrew text mixed with g(x), \ln(x) etc has Hebrew ratio ~30%, # so we need a lower threshold AND a consecutive Hebrew words heuristic. def _unwrap_hebrew_math_block(match): content = match.group(1).strip() # Count Hebrew chars vs total hebrew_chars = len(re.findall(r'[\u0590-\u05FF]', content)) total_chars = len(content.replace(' ', '')) if total_chars == 0: return '' # Empty block, remove it hebrew_ratio = hebrew_chars / total_chars # Heuristic 1: >25% Hebrew with enough chars = text paragraph if hebrew_ratio > 0.25 and hebrew_chars > 8: print(f"🧹 [SANITIZE] Unwrapped Hebrew-in-math block ({hebrew_chars} Hebrew chars, ratio={hebrew_ratio:.1%})") return content # Return as plain text without $$ # Heuristic 2: Has 3+ consecutive Hebrew words (even if ratio is low) if re.search(r'[\u0590-\u05FF]+\s+[\u0590-\u05FF]+\s+[\u0590-\u05FF]+', content): print(f"🧹 [SANITIZE] Unwrapped Hebrew-in-math (consecutive Hebrew words detected)") return content return match.group(0) # Keep as-is for real math text = re.sub(r'\$\$(.+?)\$\$', _unwrap_hebrew_math_block, text, flags=re.DOTALL) # V231.25: Fix corrupted LaTeX escapes (form feed \f, invalid \3) text = text.replace('\x0c', r'\f') text = re.sub(r'\\(\d)', r'\1', text) # V275.2: Fix double backslash newlines inside $$...$$ which crash flutter_math_fork # Split blocks safely instead of using \newline def safe_split_newlines(match): block = match.group(1) if r'\begin{' in block: # Leave environments like \begin{cases} alone, they support \\ return match.group(0) # Split by \\ or \newline parts = re.split(r'\\\\|\\newline', block) # V280.3: Ensure we don't strip the outer $$ markers when splitting blocks! joined = '$$\n$$'.join(p.strip() for p in parts if p.strip()) return f"$${joined}$$" if joined else "" text = re.sub(r'\$\$(.+?)\$\$', safe_split_newlines, text, flags=re.DOTALL) # 1. English Geometrical terms (Case Insensitive) # \\b matches word boundaries to avoid replacing substrings text = re.sub(r'\\bAngle\\b', r'\\angle', text, flags=re.IGNORECASE) text = re.sub(r'\\bTriangle\\b', r'\\triangle', text, flags=re.IGNORECASE) text = re.sub(r'\\bDeg\\b', r'^{\\circ}', text, flags=re.IGNORECASE) # 2. "Area" -> S (e.g. "Area of triangle" -> "S of triangle") # Be careful not to replace valid words, but "Area" in math context is usually S text = re.sub(r'\\bArea\\b', r'S', text, flags=re.IGNORECASE) # V262.1: Auto-wrap Hebrew inside LaTeX blocks (The "Escaping Lines" Fix) text = _auto_wrap_hebrew_in_latex(text) return text def _auto_wrap_hebrew_in_latex(text: str) -> str: """ Scans for Hebrew characters inside $$...$$ or $...$ blocks. If found, wraps them in \\text{...} to prevent rendering crashes. """ if not text: return text # Regex for Hebrew chars (including nikud/punctuation common in Hebrew) hebrew_pattern = r'([\u0590-\u05FF\s\.\,\:\-]+)' def replacer(match): content = match.group(1) # The content inside the dollars # Check if there is Hebrew in this block if re.search(r'[\u0590-\u05FF]', content): # There is Hebrew! Let's wrap the Hebrew parts in \text{...} # We split by math/hebrew chunks or just wrap the whole Hebrew phrase # Simple approach: Find Hebrew chunks and wrap them new_content = re.sub(hebrew_pattern, r'\\text{\1}', content) # Cleanup: \text{ } (empty) or double wrapping check could be added if needed return f"${new_content}$" return match.group(0) # Replace inline math $...$ (using naive non-nested check) # We use a trick to avoid matching $$...$$ first if we aren't careful, # but specific regex for $$...$$ should come first if we supported it fully as separate. # For now, let's handle $...$ which often covers $$...$$ in simple regex unless distinct. # Actually, $$ is just two $s. Let's try to be safe. # Strategy: Split by '$' and process every odd element (1, 3, 5...) as Math? # This is safer than regex for nested/complex strings. parts = text.split('$') if len(parts) < 3: return text # No math blocks new_parts = [] for i, part in enumerate(parts): if i % 2 == 1: # This is a MATH block (inside $...$) if re.search(r'[\u0590-\u05FF]', part): # Found Hebrew inside Math! Wrap it. # Note: We must be careful not to wrap existing \text{...} again if possible, # but simple wrapping usually doesn't hurt: \text{\text{...}} is valid-ish or we can ignore. # Better: only wrap Hebrew that is NOT already in \text{...}? # That's complex. Let's do the simple "Wrap Hebrew Chars" regex. # We exclude commands commands like \frac, \cdot etc. def wrap_hebrew(m): s = m.group(1) if len(s.strip()) == 0: return s # Don't wrap just whitespace if '\\text' in s: return s # Already wrapped (naive check) return f"\\text{{{s}}}" # Apply wrapping to identified hebrew chunks # Note: we use a simplified version of the regex for local substitution part = re.sub(hebrew_pattern, wrap_hebrew, part) new_parts.append(part) else: # This is a REGULAR block (outside/between $...$) new_parts.append(part) return '$'.join(new_parts) def _build_generic_response(llm_output: dict, custom_title: str = None) -> dict: """ V231.20: Rich UI formatter — restores 'Old Look' with green box and mixed text/math. ... """ # Extract steps steps = [] # helper for clean content def get_content(s): if isinstance(s, dict): # Check multiple content key names (different micro-prompts use different schemas) content = s.get("content_mixed", s.get("content", s.get("explanation", s.get("explanation_text", "")))) # V275.5: If content is still a dict (e.g. from structured JSON fragments), extract text if isinstance(content, dict): content = content.get("text", content.get("content", str(content))) return content return str(s) # V4.3 Unified Markdown Path if isinstance(llm_output, dict) and "solution_markdown" in llm_output: steps.append({ "step_id": 1, "step_number": 1, "title": "פתרון מלא", "explanation_text": sanitize_math_text(str(llm_output["solution_markdown"])), "content_mixed": sanitize_math_text(str(llm_output["solution_markdown"])), "math_artifact": {"type": "equation", "latex": ""}, "is_unified_markdown": True }) elif (isinstance(llm_output, list)) or (isinstance(llm_output, dict) and "steps" in llm_output and isinstance(llm_output["steps"], list)): raw_steps = llm_output if isinstance(llm_output, list) else llm_output["steps"] for i, s in enumerate(raw_steps, 1): content = get_content(s) if isinstance(content, str): content = sanitize_math_text(content) math_latex = "" if isinstance(s, dict): math_latex = s.get("math_latex", s.get("block_math", s.get("result", ""))) steps.append({ "step_id": s.get("step_id", i) if isinstance(s, dict) else i, "step_number": i, "title": s.get("title", f"שלב {i}") if isinstance(s, dict) else f"שלב {i}", "explanation_text": content, "content_mixed": content, "math_artifact": { "type": "equation", "latex": math_latex }, "block_math": math_latex, "teacher_tip": s.get("teacher_tip") if isinstance(s, dict) else None }) elif isinstance(llm_output, dict) and "chain_of_thought" in llm_output: # Fallback for old models cot = sanitize_math_text(str(llm_output["chain_of_thought"])) steps.append({ "step_id": 1, "step_number": 1, "title": "דרך הפתרון", "explanation_text": cot, "content_mixed": cot, "math_artifact": {"type": "equation", "latex": ""} }) else: # Last resort - use get_content helper to handle single dict blocks correctly content = get_content(llm_output) if isinstance(content, str): content = sanitize_math_text(content) steps.append({ "step_id": 1, "step_number": 1, "title": "הפתרון", "explanation_text": content, "content_mixed": content, "math_artifact": {"type": "equation", "latex": ""} }) # Extract final answer # Extract final answer with improved fallback logic (V261.16) final_answer = ( llm_output.get("final_answer") or llm_output.get("equation") or llm_output.get("solution") or llm_output.get("derivative") or llm_output.get("integral") or llm_output.get("limit") or llm_output.get("x_intercepts") or llm_output.get("min_max_points") ) # If it's a list or dict (e.g. points), convert to string representation if isinstance(final_answer, (list, dict)): final_answer = str(final_answer) if not final_answer: final_answer = "ראה שלבים" # V8.6: Inject 'approach' as Step 0 approach = llm_output.get("approach") if approach and isinstance(approach, str): steps.insert(0, { "step_id": 0, "step_number": 0, "title": "איך ניגשים לזה? 🧭", "explanation_text": sanitize_math_text(approach), "content_mixed": sanitize_math_text(approach), "math_artifact": {"type": "equation", "latex": ""}, "block_math": "" }) response_obj = { "sections": [{ "section_title": custom_title or "הפתרון", "steps": steps, "section_result": str(final_answer) # V262.0: Per-section result }], "final_answer": str(final_answer), "teacher_closing": llm_output.get("teacher_closing", "כל הכבוד! 🎉"), "approach": approach, # V8.6: Explicit approach field "teacher_summary": llm_output.get("teacher_summary") # V262.2: Propagate explicit summary } # V260.5: Propagate Investigation Data (Crucial for Table UI) if "investigation" in llm_output: response_obj["investigation"] = llm_output["investigation"] elif "investigation_table" in llm_output: response_obj["investigation"] = llm_output["investigation_table"] return response_obj if __name__ == "__main__": import json # Test circle equation llm_out = { "equation": "(x-3)^2 + (y-5)^2 = 25", "center": [3, 5], "radius": 5 } data = {"center": "(3,5)", "radius": 5} response = build_pedagogical_response("CIRCLE_EQUATION", llm_out, data) print(json.dumps(response, indent=2, ensure_ascii=False))