BuddyMath / pedagogical_builder.py
dotandru's picture
Fix: Clean production deployment with sse-starlette
9d29c62
raw
history blame
37.4 kB
# pedagogical_builder.py - V231.18
# Server-side pedagogical templates for BuddyMath
# Adds detailed explanations WITHOUT using LLM tokens!
"""
Pedagogical Builder for BuddyMath
Follows Iron Law #4: CHILD-FRIENDLY ALWAYS
LLM returns ONLY mathematical core (100-200 tokens).
Server adds full pedagogical wrapper (0 tokens!).
"""
import re
from domain.processing_strategy import ProcessingStrategy
class LLMSchemaError(Exception):
"""Custom error for LLM output validation failures."""
pass
# validate_narrative_density (Unified at line 362)
# ==================== PEDAGOGICAL TEMPLATES ====================
PEDAGOGICAL_TEMPLATES = {
# ========== GEOMETRY ==========
"CIRCLE_EQUATION": {
"intro": {
"title": "מקום גיאומטרי או משוואת מעגל",
"content": "בואו נפתור צעד אחר צעד ונשתמש במשוואת המעגל והמרחק לפי הצורך.",
"tip": "חשוב להבין את המשמעות הגיאומטרית של הנתונים"
},
"steps": [
{
"title": "מה נתון לנו?",
"uses_llm": "approach",
"tip": "תמיד מסדרים את הנתונים קודם"
},
{
"title": "איך מחשבים מרחק?",
"content": "נזכיר את נוסחת המרחק: המרחק מנקודה $(x,y)$ לנקודה $(a,b)$ הוא $d = \\sqrt{{(x-a)^2 + (y-b)^2}}$",
"block_math": "d = \\sqrt{(x-a)^2 + (y-b)^2}",
"tip": "זו נוסחת פיתגורס במסווה!"
},
{
"title": "חישוב הפתרון",
"uses_llm": "steps"
},
{
"title": "התשובה הסופית",
"content": "קיבלנו את התשובה הסופית!",
"uses_llm": "solution"
}
],
"closing": "כל הכבוד! הצלחנו לפתור את השאלה המורכבת הזו! 🎉"
},
"DERIVATIVE_QUOTIENT": {
"intro": {
"title": "נגזרת של מנה",
"content": "כשיש לנו פונקציה שהיא מנה (חילוק) של שתי פונקציות, נשתמש בכלל המנה.",
"tip": "כלל המנה: $(\\frac{u}{v})' = \\frac{u'v - uv'}{v^2}$"
},
"steps": [
{
"title": "זיהוי המונה והמכנה",
"template": "המונה: $u = {numerator}$\nהמכנה: $v = {denominator}$"
},
{
"title": "נגזרת המונה",
"content": "נמצא את $u'$ (נגזרת המונה):",
"uses_llm": "u_prime"
},
{
"title": "נגזרת המכנה",
"content": "נמצא את $v'$ (נגזרת המכנה):",
"uses_llm": "v_prime"
},
{
"title": "נציב בכלל המנה",
"content": "עכשיו נציב בנוסחה: $(\\frac{u}{v})' = \\frac{u'v - uv'}{v^2}$",
"uses_llm": "derivative",
"tip": "שים לב לסדר: u'v **פחות** uv'"
}
],
"closing": "מעולה! שלטת בכלל המנה! 💪"
},
"LINEAR_EQUATION": {
"intro": {
"title": "פתרון משוואה לינארית",
"content": "משוואה לינארית היא משוואה שבה המשתנה מופיע בחזקה 1 בלבד. המטרה: לבודד את x.",
"tip": "כלל הזהב: מה שעושים בצד אחד, עושים גם בצד השני!"
},
"steps": [
{
"title": "המשוואה שלנו",
"template": "נתון: ${equation}$"
},
{
"title": "פתרון שלב אחר שלב",
"uses_llm": "steps",
"tip": "בכל שלב, נקרב את x לבידוד"
},
{
"title": "התשובה",
"template": "הפתרון: $x = {solution}$"
}
],
"closing": "יפה! פתרת את המשוואה! ✅"
},
# ========== GENERAL / ALGEBRA ==========
"GENERAL": {
"intro": {
"title": "ניתוח השאלה",
"content": "בואו נראה מה נתון לנו ומה צריך למצוא. נפרק את הבעיה לשלבים פשוטים.",
"tip": "קריאה נכונה של השאלה היא 50% מהפתרון!"
},
"steps": [
{
"title": "מה נתון?",
"content": "נסדר את הנתונים והמשוואות בצורה ברורה.",
"uses_llm": "approach" # Use approach/strategy as step 1
},
{
"title": "דרך הפתרון",
"uses_llm": "steps",
"tip": "נפתור שלב אחר שלב בצורה מסודרת"
},
{
"title": "תשובה סופית",
"template": "הגענו לתוצאה: {solution}",
"uses_llm": "solution"
}
],
"closing": "מצוין! סיימנו את הסעיף הזה בהצלחה."
},
# Alias for Rational Function (uses General structure but refined)
"RATIONAL_FUNCTION": {
"intro": {
"title": "חקירת פונקציה רציונלית",
"content": "פונקציה רציונלית היא מנה של פולינומים. נבדוק תחום הגדרה, אסימפטוטות ונקודות מיוחדות.",
"tip": "חשוב לבדוק מתי המכנה מתאפס!"
},
"steps": [
{
"title": "ניתוח הפונקציה",
"content": "נסתכל על המונה והמכנה ונראה אם אפשר לפשט.",
"uses_llm": "approach"
},
{
"title": "הפתרון המלא",
"uses_llm": "steps",
"tip": "עבודה מסודרת מונעת טעויות חישוב"
},
{
"title": "סיכום",
"uses_llm": "solution"
}
],
"closing": "כל הכבוד! חקירה יסודית היא המפתח."
},
# ========== TRIGONOMETRY ==========
"TRIGONOMETRY": {
"intro": {
"title": "חשבון טריגונומטרי",
"content": "נשתמש בזהויות טריגונומטריות ובתכונות המשולש כדי לפתור.",
"tip": "זכור: sin²x + cos²x = 1"
},
"steps": [
{
"title": "זיהוי המצב",
"content": "נבדוק אילו זווית וצלעות נתונות לנו.",
"uses_llm": "approach"
},
{
"title": "ביצוע החישוב",
"uses_llm": "steps",
"tip": "שימו לב יחידות מעלות/רדיאנים!"
},
{
"title": "התשובה",
"template": "התוצאה: {solution}",
"uses_llm": "solution"
}
],
"closing": "מצוין! הטריגונומטריה בידינו! 📐"
},
# Alias for basic trig
"TRIG_BASIC": {
"intro": { "title": "טריגונומטריה בסיסית", "content": "חישוב זוויות וצלעות במשולש ישר זווית." },
"steps": [{"title": "פתרון", "uses_llm": "steps"}, {"title": "תשובה", "uses_llm": "solution"}],
"closing": "יופי!"
}
}
def build_pedagogical_response(
topic_id: str,
llm_output: dict,
data_anchor: dict,
custom_title: str = None, # V260.3: Allow override
proof_graph = None, # V1.1: Immutable ProofGraph
processing_strategy: ProcessingStrategy = None # V5.8.0: Intent Contract
) -> dict:
"""
V4.2 (Behavioral Firewall): Projection-Only Builder.
The UI serves ONLY as a projection of the mathematical ProofGraph.
LLM math generation is strictly forbidden.
"""
try:
print(f"🧱 [V4.2] Projection-Only Mode: topic={topic_id}, ProofGraph={proof_graph is not None}")
print(f"DEBUG [PRE-SCRUB]: LLM generated raw narrative: {llm_output}")
if not proof_graph or not proof_graph.steps:
# V5.8.0: Enforce Intent Matrix! If strategy is STRICT_SYMBOLIC, failure to provide graph is a fatal error.
if processing_strategy == ProcessingStrategy.STRICT_SYMBOLIC:
print(f"🛑 [V5.8.0] STRICT_SYMBOLIC Violation: No ProofGraph provided. Blocking response.")
raise LLMSchemaError("Truth Authority Violation: STRICT_SYMBOLIC strategy requires a verified ProofGraph.")
if processing_strategy == ProcessingStrategy.HEURISTIC_DEDUCTION:
print(f"✅ [V7.3] HEURISTIC_DEDUCTION detected. Bypassing Truth Authority.")
return _build_generic_response(llm_output, custom_title=custom_title)
if isinstance(llm_output, list) and len(llm_output) > 0:
print(f"✅ [V7.3] Hybrid Navigation detected (List Segment). Bypassing ProofGraph requirement.")
return _build_generic_response(llm_output, custom_title=custom_title)
if isinstance(llm_output, dict) and ("solution_markdown" in llm_output or "steps" in llm_output or "chain_of_thought" in llm_output):
return _build_generic_response(llm_output, custom_title=custom_title)
# V8.5 RESILIENCE: One more attempt to find steps if we're failing
if isinstance(llm_output, dict) and "sections" in llm_output:
return _build_generic_response(llm_output, custom_title=custom_title)
# If no clues at all, THEN we raise
logger.warning(f"⚠️ [V8.5] Truth Authority Violation: Falling back to generic due to invalid structure: {llm_output}")
return _build_generic_response(llm_output, custom_title=custom_title)
# 1. Map ProofGraph to Immutable Truth Nodes
sympy_nodes = []
for step in proof_graph.steps:
sympy_nodes.append({
"step_id": step.step_id,
"block_math": step.math_content,
"title": step.logic_description or f"שלב {step.step_id}"
})
# 2. Extract explanations from LLM (The "Skin") - V4.2.7 supports list or dict
if isinstance(llm_output, list):
llm_explanations = llm_output
else:
# V5.8.2: Support parsing nested 'sections' from the LLM output
llm_explanations = llm_output.get("steps_explanations", llm_output.get("steps", []))
if not llm_explanations and "sections" in llm_output:
for section in llm_output["sections"]:
if "steps" in section:
llm_explanations.extend(section["steps"])
if not llm_explanations:
# Internal Fallback: If LLM failed, use generic text to preserve UI
llm_explanations = [{"step_id": s["step_id"], "explanation_text": "נבצע את החישוב המתמטי"} for s in sympy_nodes]
else:
# V276.1: Normalize explanations to handle structured content/type keys
for node in llm_explanations:
if "explanation_text" not in node or not node["explanation_text"]:
node["explanation_text"] = node.get("content_mixed", node.get("content", node.get("explanation", "")))
# Unpack dict if still found
if isinstance(node["explanation_text"], dict):
node["explanation_text"] = node["explanation_text"].get("content", node["explanation_text"].get("text", str(node["explanation_text"])))
# V5.8.2: Layer 2 Runtime Validator (The Kill Switch)
for node in llm_explanations:
text = node.get("explanation_text", "")
if not validate_narrative_density(text):
print(f"🛑 [V5.8.2] KILL SWITCH TRIGGERED on text: {text}")
raise LLMSchemaError("NARRATIVE_OVERFLOW: Explanation is too dense or contains forbidden math/English characters.")
# 3. Deterministic Merge (Iron Law) - V4.2.7: explanation_text
merged_steps = merge_and_verify_explanations(sympy_nodes, llm_explanations)
# 5. UI Projection (Hard Decoupling V4.2.10)
ui_steps = []
for i, node in enumerate(merged_steps):
# V8.6.2: Ensure LaTeX preserved in content_mixed (removed aggressive $ and \ stripping)
explanation = sanitize_math_text(node["explanation_text"])
math_content = node["block_math"]
ui_steps.append({
"step_id": node["step_id"],
"step_number": i + 1,
"explanation_text": explanation,
"math_artifact": {
"type": "equation",
"latex": math_content,
"table_data": ""
},
# We keep these for one more version as 'Ghost Keys' for extreme backward compatibility
# but they now mirror the structured data perfectly.
"content_mixed": explanation,
"block_math": math_content
})
# V8.6: Inject 'approach' as Step 0 to ensure Flutter displays it
approach = llm_output.get("approach")
if approach and isinstance(approach, str):
ui_steps.insert(0, {
"step_id": 0,
"step_number": 0,
"title": "איך ניגשים לזה? 🧭",
"explanation_text": sanitize_math_text(approach),
"content_mixed": sanitize_math_text(approach),
"math_artifact": {"type": "equation", "latex": ""},
"block_math": ""
})
# V8.6.2: Final check on teacher_summary from LLM
summary = llm_output.get("teacher_summary") or llm_output.get("summary")
response = {
"sections": [{
"section_title": custom_title or "פתרון מלא ומדויק",
"steps": ui_steps,
"section_result": merged_steps[-1]["block_math"] if merged_steps else ""
}],
"final_answer": merged_steps[-1]["block_math"] if merged_steps else "",
"teacher_closing": llm_output.get("teacher_closing", "כל הכבוד על פתרון התרגיל! 🎉"),
"approach": approach,
"teacher_summary": summary
}
# V260.5: Propagate Investigation Data (Crucial for Table UI)
if "investigation" in llm_output:
response["investigation"] = llm_output["investigation"]
elif "investigation_table" in llm_output:
response["investigation"] = llm_output["investigation_table"]
return apply_cognitive_load_limiter(response)
except Exception as e:
logger.error(f"🚨 [V8.5 RESILIENCE] Builder Crash: {e}. Falling back to generic.")
return _build_generic_response(llm_output, custom_title=custom_title)
def apply_cognitive_load_limiter(response: dict) -> dict:
"""
V1.1: Cognitive Load Limiter.
Ensures steps are revealed gradually.
"""
if "sections" not in response: return response
# Limit to first 2 steps if complex, mark others as 'hidden'
step_count = 0
for section in response["sections"]:
if "steps" in section:
for step in section["steps"]:
step_count += 1
# V1.1 Rule: If more than 3 steps, flag the rest for gradual disclosure
if step_count > 3:
step["disclosure_state"] = "HIDDEN"
else:
step["disclosure_state"] = "VISIBLE"
return response
def validate_narrative_density(text: str) -> bool:
"""
V5.8.2: Layer 2 Runtime Validator (Kill Switch).
Checks if the pedagogical explanation adheres to the Hard Doctrine.
V8.5: RESILIENCE - Relaxed to allow math symbols in text-only steps.
Returns False ONLY if it is too long (runaway LLM) or contains dangerous code.
"""
if len(text) > 400:
return False
# V8.5: Increased tolerance for English letters (ABC labels) and math signs.
# We only block forbidden programmatic keywords like 'def', 'class', etc.
import re
if re.search(r'\b(import|def|class|lambda)\b', text):
return False
return True
def merge_and_verify_explanations(sympy_nodes: list[dict], llm_explanations: list[dict]) -> list[dict]:
"""
V2.5.3: The Swiss Watch Maneuver.
V3.1.3: Hardened Merge Phase with robust guards.
V5.8.2: Robust Merge (Option 1) to ignore LLM self-referencing narrative drift.
Merges Immutable SymPy math (Truth) with LLM explanations (Skin).
"""
final_nodes = [] # V3.1.3: Mandatory initialization
try:
# V5.8.2 Robust Merge
for step in sympy_nodes:
sid = step["step_id"]
# Find all LLM explanations for this step
candidates = [
s for s in llm_explanations
if s.get("step_id") == sid and "explanation_text" in s
]
if not candidates:
# If LLM completely missed a step, fallback to generic
final_nodes.append({
**step,
"explanation_text": "נבצע את החישוב המתמטי."
})
continue
# Take the last candidate to ignore preamble/meta-commentary drift
best_candidate = candidates[-1]
explanation_text = best_candidate["explanation_text"]
# V6 Narrative Drift Telemetry
if "allowed_concepts" in step and step["allowed_concepts"]:
import re
words = [w for w in re.split(r'\s+', explanation_text) if len(w) > 2] # simple word split ignoring short connectives
allowed_words = set()
for concept in step["allowed_concepts"]:
allowed_words.update(concept.split())
unauthorized_words = [w for w in words if w not in allowed_words]
drift_percentage = (len(unauthorized_words) / max(len(words), 1)) * 100
if drift_percentage > 50.0:
import logging
logger = logging.getLogger(__name__)
logger.warning(f"⚠️ [V6 TELEMETRY] Drift Warning: {drift_percentage:.1f}% concept drift in Step {sid} (unauthorized: {unauthorized_words[:3]}...)")
# V5.8.2 Kill Switch Validator Call
if not validate_narrative_density(explanation_text):
msg = f"NARRATIVE_OVERFLOW: Explanation rejected by Kill Switch: {explanation_text[:20]}..."
print(f"🚨 [V5.8.2] {msg}")
raise LLMSchemaError("NARRATIVE_OVERFLOW")
merged_node = {
**step,
"explanation_text": explanation_text
}
final_nodes.append(merged_node)
return final_nodes
except LLMSchemaError:
raise
except Exception as e:
import logging
logger = logging.getLogger(__name__)
logger.error(f"🚨 [V3.1.3] Merge Phase Failed: {e}")
# Since orchestrator is downstream, we re-raise or return something that indicates failure.
raise LLMSchemaError(f"Merge failure: {str(e)}")
def _normalize_llm_keys(llm_output: dict) -> dict:
"""V275.3: Map alternative LLM output keys to expected template keys.
E.g., CIRCLE_EQUATION returns 'equation' but GENERAL template expects 'solution'."""
result = dict(llm_output)
# Map equation -> solution if solution is missing
if "solution" not in result and "equation" in result:
result["solution"] = result["equation"]
# Map approach alternatives
if "approach" not in result and "strategy" in result:
result["approach"] = result["strategy"]
return result
def _build_template_response(topic_id: str, llm_output: dict, data_anchor: dict) -> dict:
"""Build response using topic-specific template."""
template = PEDAGOGICAL_TEMPLATES[topic_id]
# V275.3: Normalize LLM keys so templates always find what they need
llm_output = _normalize_llm_keys(llm_output)
# Build response
section_data = {
"section_title": "פתרון מלא",
"steps": [],
"section_result": llm_output.get("equation") or llm_output.get("solution") or llm_output.get("derivative") # V262.0: Per-section result
}
response = {
"sections": [section_data],
"final_answer": section_data["section_result"],
"teacher_closing": template.get("closing", "כל הכבוד! 🎉"),
"teacher_summary": llm_output.get("teacher_summary") # V262.2: Propagate explicit summary
}
# Add intro step
if "intro" in template:
intro = template["intro"]
response["sections"][0]["steps"].append({
"step_number": 0,
"title": intro["title"],
"content_mixed": intro["content"],
"teacher_tip": intro.get("tip", "")
})
# Add main steps
for i, step_template in enumerate(template["steps"], start=1):
# V275.2 FIX: Handle unpacking of 'steps' list from llm_output gracefully!
if step_template.get("uses_llm") == "steps" and isinstance(llm_output.get("steps"), list):
for s in llm_output["steps"]:
steps_count = len(response["sections"][0]["steps"])
# V275.3: Check multiple content key names (different micro-prompts use different schemas)
content = s.get("content_mixed", s.get("content", s.get("explanation", "")))
new_step = {
"step_number": steps_count + 1,
"title": s.get("title", f"שלב {steps_count + 1}"),
"content_mixed": sanitize_math_text(content),
"block_math": s.get("block_math", s.get("result", "")),
"teacher_tip": s.get("teacher_tip", step_template.get("tip"))
}
response["sections"][0]["steps"].append(new_step)
continue
step = {
"step_number": len(response["sections"][0]["steps"]) + 1,
"title": step_template["title"]
}
# Fill content
if "template" in step_template:
# Template with data substitution (merge with llm_output to prevent KeyError)
try:
content = step_template["template"].format(**{**data_anchor, **llm_output})
except KeyError:
# V275.3: Strip unresolved {variable} placeholders instead of showing them raw
content = re.sub(r'\{\w+\}', '', step_template["template"]).strip()
step["content_mixed"] = content
if "content" in step_template:
step["content_mixed"] = step_template["content"]
if "block_math" in step_template:
step["block_math"] = step_template["block_math"]
if "uses_llm" in step_template:
# Use LLM output
llm_key = step_template["uses_llm"]
if llm_key in llm_output:
if llm_key == "solution" or llm_key == "approach":
# Solutions and approaches usually have Hebrew, put them in content to prevent flutter crash
# If there's already template content, append to it
if step.get("content_mixed"):
step["content_mixed"] += "\n" + sanitize_math_text(str(llm_output[llm_key]))
else:
step["content_mixed"] = sanitize_math_text(str(llm_output[llm_key]))
else:
step["block_math"] = sanitize_math_text(str(llm_output[llm_key]))
if "tip" in step_template:
step["teacher_tip"] = step_template["tip"]
response["sections"][0]["steps"].append(step)
return response
import gibberish_detector # V231.25: Fix gibberish!
def sanitize_math_text(text: str) -> str:
"""
V231.23: Remove English math artifacts and enforce Hebrew/Latex conventions.
Forces 'Angle' -> '\\angle', 'Triangle' -> '\\triangle', 'Area' -> 'S'.
V260.4: Also runs auto_fix_gibberish (reversed Hebrew, broken Latex).
"""
if not text:
return text
# V260.4: First, fix structural gibberish (reversed Hebrew, broken LaTeX)
text = gibberish_detector.auto_fix_gibberish(text)
# V275.3: Fix quadruple dollars $$$$ -> $$ EARLY (before any block processing)
text = re.sub(r'\${3,}', '$$', text)
# V275.4: CRITICAL - Detect and unwrap $$Hebrew paragraph$$ blocks
# The LLM wraps Hebrew explanations in $$...$$ which renders as garbled "mirror text"
# Key insight: Hebrew text mixed with g(x), \ln(x) etc has Hebrew ratio ~30%,
# so we need a lower threshold AND a consecutive Hebrew words heuristic.
def _unwrap_hebrew_math_block(match):
content = match.group(1).strip()
# Count Hebrew chars vs total
hebrew_chars = len(re.findall(r'[\u0590-\u05FF]', content))
total_chars = len(content.replace(' ', ''))
if total_chars == 0:
return '' # Empty block, remove it
hebrew_ratio = hebrew_chars / total_chars
# Heuristic 1: >25% Hebrew with enough chars = text paragraph
if hebrew_ratio > 0.25 and hebrew_chars > 8:
print(f"🧹 [SANITIZE] Unwrapped Hebrew-in-math block ({hebrew_chars} Hebrew chars, ratio={hebrew_ratio:.1%})")
return content # Return as plain text without $$
# Heuristic 2: Has 3+ consecutive Hebrew words (even if ratio is low)
if re.search(r'[\u0590-\u05FF]+\s+[\u0590-\u05FF]+\s+[\u0590-\u05FF]+', content):
print(f"🧹 [SANITIZE] Unwrapped Hebrew-in-math (consecutive Hebrew words detected)")
return content
return match.group(0) # Keep as-is for real math
text = re.sub(r'\$\$(.+?)\$\$', _unwrap_hebrew_math_block, text, flags=re.DOTALL)
# V231.25: Fix corrupted LaTeX escapes (form feed \f, invalid \3)
text = text.replace('\x0c', r'\f')
text = re.sub(r'\\(\d)', r'\1', text)
# V275.2: Fix double backslash newlines inside $$...$$ which crash flutter_math_fork
# Split blocks safely instead of using \newline
def safe_split_newlines(match):
block = match.group(1)
if r'\begin{' in block:
# Leave environments like \begin{cases} alone, they support \\
return match.group(0)
# Split by \\ or \newline
parts = re.split(r'\\\\|\\newline', block)
# V280.3: Ensure we don't strip the outer $$ markers when splitting blocks!
joined = '$$\n$$'.join(p.strip() for p in parts if p.strip())
return f"$${joined}$$" if joined else ""
text = re.sub(r'\$\$(.+?)\$\$', safe_split_newlines, text, flags=re.DOTALL)
# 1. English Geometrical terms (Case Insensitive)
# \\b matches word boundaries to avoid replacing substrings
text = re.sub(r'\\bAngle\\b', r'\\angle', text, flags=re.IGNORECASE)
text = re.sub(r'\\bTriangle\\b', r'\\triangle', text, flags=re.IGNORECASE)
text = re.sub(r'\\bDeg\\b', r'^{\\circ}', text, flags=re.IGNORECASE)
# 2. "Area" -> S (e.g. "Area of triangle" -> "S of triangle")
# Be careful not to replace valid words, but "Area" in math context is usually S
text = re.sub(r'\\bArea\\b', r'S', text, flags=re.IGNORECASE)
# V262.1: Auto-wrap Hebrew inside LaTeX blocks (The "Escaping Lines" Fix)
text = _auto_wrap_hebrew_in_latex(text)
return text
def _auto_wrap_hebrew_in_latex(text: str) -> str:
"""
Scans for Hebrew characters inside $$...$$ or $...$ blocks.
If found, wraps them in \\text{...} to prevent rendering crashes.
"""
if not text: return text
# Regex for Hebrew chars (including nikud/punctuation common in Hebrew)
hebrew_pattern = r'([\u0590-\u05FF\s\.\,\:\-]+)'
def replacer(match):
content = match.group(1) # The content inside the dollars
# Check if there is Hebrew in this block
if re.search(r'[\u0590-\u05FF]', content):
# There is Hebrew! Let's wrap the Hebrew parts in \text{...}
# We split by math/hebrew chunks or just wrap the whole Hebrew phrase
# Simple approach: Find Hebrew chunks and wrap them
new_content = re.sub(hebrew_pattern, r'\\text{\1}', content)
# Cleanup: \text{ } (empty) or double wrapping check could be added if needed
return f"${new_content}$"
return match.group(0)
# Replace inline math $...$ (using naive non-nested check)
# We use a trick to avoid matching $$...$$ first if we aren't careful,
# but specific regex for $$...$$ should come first if we supported it fully as separate.
# For now, let's handle $...$ which often covers $$...$$ in simple regex unless distinct.
# Actually, $$ is just two $s. Let's try to be safe.
# Strategy: Split by '$' and process every odd element (1, 3, 5...) as Math?
# This is safer than regex for nested/complex strings.
parts = text.split('$')
if len(parts) < 3: return text # No math blocks
new_parts = []
for i, part in enumerate(parts):
if i % 2 == 1: # This is a MATH block (inside $...$)
if re.search(r'[\u0590-\u05FF]', part):
# Found Hebrew inside Math! Wrap it.
# Note: We must be careful not to wrap existing \text{...} again if possible,
# but simple wrapping usually doesn't hurt: \text{\text{...}} is valid-ish or we can ignore.
# Better: only wrap Hebrew that is NOT already in \text{...}?
# That's complex. Let's do the simple "Wrap Hebrew Chars" regex.
# We exclude commands commands like \frac, \cdot etc.
def wrap_hebrew(m):
s = m.group(1)
if len(s.strip()) == 0: return s # Don't wrap just whitespace
if '\\text' in s: return s # Already wrapped (naive check)
return f"\\text{{{s}}}"
# Apply wrapping to identified hebrew chunks
# Note: we use a simplified version of the regex for local substitution
part = re.sub(hebrew_pattern, wrap_hebrew, part)
new_parts.append(part)
else:
# This is a REGULAR block (outside/between $...$)
new_parts.append(part)
return '$'.join(new_parts)
def _build_generic_response(llm_output: dict, custom_title: str = None) -> dict:
"""
V231.20: Rich UI formatter — restores 'Old Look' with green box and mixed text/math.
...
"""
# Extract steps
steps = []
# helper for clean content
def get_content(s):
if isinstance(s, dict):
# Check multiple content key names (different micro-prompts use different schemas)
content = s.get("content_mixed", s.get("content", s.get("explanation", s.get("explanation_text", ""))))
# V275.5: If content is still a dict (e.g. from structured JSON fragments), extract text
if isinstance(content, dict):
content = content.get("text", content.get("content", str(content)))
return content
return str(s)
# V4.3 Unified Markdown Path
if isinstance(llm_output, dict) and "solution_markdown" in llm_output:
steps.append({
"step_id": 1,
"step_number": 1,
"title": "פתרון מלא",
"explanation_text": sanitize_math_text(str(llm_output["solution_markdown"])),
"content_mixed": sanitize_math_text(str(llm_output["solution_markdown"])),
"math_artifact": {"type": "equation", "latex": ""},
"is_unified_markdown": True
})
elif (isinstance(llm_output, list)) or (isinstance(llm_output, dict) and "steps" in llm_output and isinstance(llm_output["steps"], list)):
raw_steps = llm_output if isinstance(llm_output, list) else llm_output["steps"]
for i, s in enumerate(raw_steps, 1):
content = get_content(s)
if isinstance(content, str):
content = sanitize_math_text(content)
math_latex = ""
if isinstance(s, dict):
math_latex = s.get("math_latex", s.get("block_math", s.get("result", "")))
steps.append({
"step_id": s.get("step_id", i) if isinstance(s, dict) else i,
"step_number": i,
"title": s.get("title", f"שלב {i}") if isinstance(s, dict) else f"שלב {i}",
"explanation_text": content,
"content_mixed": content,
"math_artifact": {
"type": "equation",
"latex": math_latex
},
"block_math": math_latex,
"teacher_tip": s.get("teacher_tip") if isinstance(s, dict) else None
})
elif isinstance(llm_output, dict) and "chain_of_thought" in llm_output:
# Fallback for old models
cot = sanitize_math_text(str(llm_output["chain_of_thought"]))
steps.append({
"step_id": 1,
"step_number": 1,
"title": "דרך הפתרון",
"explanation_text": cot,
"content_mixed": cot,
"math_artifact": {"type": "equation", "latex": ""}
})
else:
# Last resort - use get_content helper to handle single dict blocks correctly
content = get_content(llm_output)
if isinstance(content, str):
content = sanitize_math_text(content)
steps.append({
"step_id": 1,
"step_number": 1,
"title": "הפתרון",
"explanation_text": content,
"content_mixed": content,
"math_artifact": {"type": "equation", "latex": ""}
})
# Extract final answer
# Extract final answer with improved fallback logic (V261.16)
final_answer = (
llm_output.get("final_answer") or
llm_output.get("equation") or
llm_output.get("solution") or
llm_output.get("derivative") or
llm_output.get("integral") or
llm_output.get("limit") or
llm_output.get("x_intercepts") or
llm_output.get("min_max_points")
)
# If it's a list or dict (e.g. points), convert to string representation
if isinstance(final_answer, (list, dict)):
final_answer = str(final_answer)
if not final_answer:
final_answer = "ראה שלבים"
# V8.6: Inject 'approach' as Step 0
approach = llm_output.get("approach")
if approach and isinstance(approach, str):
steps.insert(0, {
"step_id": 0,
"step_number": 0,
"title": "איך ניגשים לזה? 🧭",
"explanation_text": sanitize_math_text(approach),
"content_mixed": sanitize_math_text(approach),
"math_artifact": {"type": "equation", "latex": ""},
"block_math": ""
})
response_obj = {
"sections": [{
"section_title": custom_title or "הפתרון",
"steps": steps,
"section_result": str(final_answer) # V262.0: Per-section result
}],
"final_answer": str(final_answer),
"teacher_closing": llm_output.get("teacher_closing", "כל הכבוד! 🎉"),
"approach": approach, # V8.6: Explicit approach field
"teacher_summary": llm_output.get("teacher_summary") # V262.2: Propagate explicit summary
}
# V260.5: Propagate Investigation Data (Crucial for Table UI)
if "investigation" in llm_output:
response_obj["investigation"] = llm_output["investigation"]
elif "investigation_table" in llm_output:
response_obj["investigation"] = llm_output["investigation_table"]
return response_obj
if __name__ == "__main__":
import json
# Test circle equation
llm_out = {
"equation": "(x-3)^2 + (y-5)^2 = 25",
"center": [3, 5],
"radius": 5
}
data = {"center": "(3,5)", "radius": 5}
response = build_pedagogical_response("CIRCLE_EQUATION", llm_out, data)
print(json.dumps(response, indent=2, ensure_ascii=False))