Spaces:

dotandru
/

BuddyMath

Sleeping

App Files Files Community

dotandru commited on Mar 15

Commit

0c3327c

1 Parent(s): d0be315

V285.5: Multi-image ordering implementation and pedagogical grading logic

Browse files

Files changed (7) hide show

domain/math_validator.py +26 -3
firebase_manager.py +14 -15
main.py +4 -0
orchestrator.py +189 -44
prompts.py +110 -18
strategy_manager.py +2 -2
visuals.py +45 -22

domain/math_validator.py CHANGED Viewed

@@ -28,7 +28,9 @@ def _latex_to_sympy_str(latex_str: str) -> str:
     Best-effort LaTeX → SymPy-parseable string.
     V310.0: Aggressive Hebrew stripping and malformed notation cleanup.
     """
-    s = latex_str.strip()
     # 0. V310.0: Strip Hebrew characters and BOM/Zero-width chars immediately
     s = re.sub(r'[\u0590-\u05FF\u200B-\u200D\uFEFF]', ' ', s)
@@ -96,6 +98,11 @@ class MathPolygraph:
         2. parse_expr(evaluate=False): Prevent RCE and immediate evaluation.
         """
         try:
             # RCE Prevention: Extreme character whitelist before parsing
             # V280.0 FIX: Added ! for factorials and ensured strict match.
             safe_pattern = r'^[a-zA-Z0-9\s\+\-\*\/\^\(\)\.\,\!\%\=]+$'
@@ -229,9 +236,13 @@ class MathPolygraph:
         return True, ""
     @staticmethod
-    async def validate_step_sequence(steps: List[dict]) -> Tuple[bool, str]:
         if not steps:
             return True, ""
         for step in steps:
             step_id = step.get('step_id', step.get('step_number', '?'))
             math_fields = []
@@ -241,6 +252,14 @@ class MathPolygraph:
                     math_fields.append(val.strip())
             if not math_fields:
                 continue
             ok, reason = await MathPolygraph._validate_single(math_fields[0], step_id)
             if not ok:
                 return False, reason
@@ -294,11 +313,15 @@ class MathPolygraph:
             return False
     @staticmethod
-    async def verify_algebraic_consistency(steps: List[dict]) -> Tuple[bool, str]:
         """
         V1.3: Checks if a sequence of steps is algebraically consistent.
         Currently checks if subsequent steps are equivalent (for simplifications).
         """
         math_steps = []
         for step in steps:
             math = step.get('math_latex') or step.get('block_math') or step.get('math')

     Best-effort LaTeX → SymPy-parseable string.
     V310.0: Aggressive Hebrew stripping and malformed notation cleanup.
     """
+    if latex_str is None:
+        return ""
+    s = str(latex_str).strip()
     # 0. V310.0: Strip Hebrew characters and BOM/Zero-width chars immediately
     s = re.sub(r'[\u0590-\u05FF\u200B-\u200D\uFEFF]', ' ', s)
         2. parse_expr(evaluate=False): Prevent RCE and immediate evaluation.
         """
         try:
+            # V317.8: Suppress SymPy Deprecation Warnings (e.g. non-Expr in Pow)
+            import warnings
+            from sympy.utilities.exceptions import SymPyDeprecationWarning
+            warnings.filterwarnings("ignore", category=SymPyDeprecationWarning)
             # RCE Prevention: Extreme character whitelist before parsing
             # V280.0 FIX: Added ! for factorials and ensured strict match.
             safe_pattern = r'^[a-zA-Z0-9\s\+\-\*\/\^\(\)\.\,\!\%\=]+$'
         return True, ""
     @staticmethod
+    async def validate_step_sequence(steps: List[dict], topic: str = "GENERAL") -> Tuple[bool, str]:
         if not steps:
             return True, ""
+        # V8.9.4: Skip deep SymPy parsing for discrete sequence steps to avoid false-positive SyntaxErrors
+        is_sequence = topic and "SEQUENCE" in topic.upper()
         for step in steps:
             step_id = step.get('step_id', step.get('step_number', '?'))
             math_fields = []
                     math_fields.append(val.strip())
             if not math_fields:
                 continue
+            # If sequence, we only check if it's "valid-ish" LaTeX vs deep SymPy check
+            if is_sequence:
+                # Basic sanity check for LaTeX balance
+                if math_fields[0].count('{') != math_fields[0].count('}'):
+                    return False, f"LATEX_BRACKET_MISMATCH:step_{step_id}"
+                continue
             ok, reason = await MathPolygraph._validate_single(math_fields[0], step_id)
             if not ok:
                 return False, reason
             return False
     @staticmethod
+    async def verify_algebraic_consistency(steps: List[dict], topic: str = "GENERAL") -> Tuple[bool, str]:
         """
         V1.3: Checks if a sequence of steps is algebraically consistent.
         Currently checks if subsequent steps are equivalent (for simplifications).
         """
+        # V8.9.4: Skip deep SymPy parsing for discrete sequence steps
+        if topic and "SEQUENCE" in topic.upper():
+            return True, ""
         math_steps = []
         for step in steps:
             math = step.get('math_latex') or step.get('block_math') or step.get('math')

firebase_manager.py CHANGED Viewed

@@ -30,42 +30,41 @@ class FirebaseManager:
             return
         try:
-            from config import FIREBASE_CREDENTIALS_PATH, STORAGE_BUCKET, IS_PRODUCTION
             import json
             logger.info("🛠️ [FIREBASE] Starting initialization...")
-            # --- DEBUG: Print all relevant env var names ---
-            fb_vars = [v for v in os.environ.keys() if "FIREBASE" in v or "GOOGLE" in v]
-            logger.info(f"🔍 [FIREBASE] Detected environment variables: {fb_vars}")
             cred_dict = None
-            # Try loading from environment variable first (for HF Secrets)
-            env_creds = os.environ.get("FIREBASE_CREDENTIALS_JSON")
-            if env_creds and len(env_creds.strip()) > 10:
                 try:
-                    cred_dict = json.loads(env_creds)
-                    logger.info("✅ [FIREBASE] Successfully parsed credentials from FIREBASE_CREDENTIALS_JSON.")
                 except Exception as e:
-                    logger.error(f"❌ [FIREBASE] Failed to parse FIREBASE_CREDENTIALS_JSON: {e}")
-            # Fallback to file if environment variable not set or failed
             if not cred_dict:
                 if os.path.exists(FIREBASE_CREDENTIALS_PATH):
                     with open(FIREBASE_CREDENTIALS_PATH, "r", encoding="utf-8") as f:
                         cred_dict = json.load(f)
                     logger.info(f"📂 [FIREBASE] Loading credentials from file: {FIREBASE_CREDENTIALS_PATH}.")
                 else:
-                    logger.warning(f"⚠️ [FIREBASE] No credentials found at {FIREBASE_CREDENTIALS_PATH} and env var is empty.")
-                    # We don't return here, we try to see if it was already initialized by something else
             if cred_dict:
                 cred = credentials.Certificate(cred_dict)
                 firebase_admin.initialize_app(cred, {
                     'storageBucket': STORAGE_BUCKET
                 })
-                logger.info(f"🚀 [FIREBASE] SDK Initialized for {'PROD' if IS_PRODUCTION else 'DEV'}.")
             self._bucket = storage.bucket()
             self._db = firestore.client()

             return
         try:
+            from config import STORAGE_BUCKET, IS_PRODUCTION
             import json
             logger.info("🛠️ [FIREBASE] Starting initialization...")
             cred_dict = None
+            # Mission 1: Try loading from environment variables (checking multiple names for safety)
+            creds_str = os.environ.get("FIREBASE_CREDENTIALS") or os.environ.get("FIREBASE_CREDENTIALS_JSON")
+            if creds_str and len(creds_str.strip()) > 10:
                 try:
+                    cred_dict = json.loads(creds_str)
+                    logger.info("✅ [FIREBASE] Successfully parsed credentials from Environment Secrets!")
                 except Exception as e:
+                    logger.error(f"❌ [FIREBASE] Failed to parse Environment Credentials: {e}")
+            # Fallback to local file only for local development (if no secret is set)
             if not cred_dict:
+                from config import FIREBASE_CREDENTIALS_PATH
                 if os.path.exists(FIREBASE_CREDENTIALS_PATH):
                     with open(FIREBASE_CREDENTIALS_PATH, "r", encoding="utf-8") as f:
                         cred_dict = json.load(f)
                     logger.info(f"📂 [FIREBASE] Loading credentials from file: {FIREBASE_CREDENTIALS_PATH}.")
                 else:
+                    logger.warning(f"⚠️ [FIREBASE] Credentials not found in environment OR local file at {FIREBASE_CREDENTIALS_PATH}.")
             if cred_dict:
                 cred = credentials.Certificate(cred_dict)
                 firebase_admin.initialize_app(cred, {
                     'storageBucket': STORAGE_BUCKET
                 })
+                logger.info(f"🚀 [FIREBASE] SDK Initialized successfully for {'PROD' if IS_PRODUCTION else 'DEV'}.")
+            else:
+                logger.error("❌ [FIREBASE] CRITICAL ERROR: Firebase credentials not found! Firebase is OFFLINE.")
             self._bucket = storage.bucket()
             self._db = firestore.client()

main.py CHANGED Viewed

@@ -387,6 +387,7 @@ async def solve_stream(
     מקבל קובץ ישירות מהפלאטר ומפענח אותו עם OpenCV.
     """
     final_student_name = student_name or user or "תלמיד"
     print(f"🚀 🟢 BIT-LOG: Received Multipart request from {final_student_name}. Grade: {grade}")
     # Quota Check
@@ -535,6 +536,9 @@ async def solve_stream_v2(
     # Only increment usage if OCR/Solving process starts successfully
     try:
         # 1. קריאת הבינארי
         image_bytes_list = []
         for single_file in files:

     מקבל קובץ ישירות מהפלאטר ומפענח אותו עם OpenCV.
     """
     final_student_name = student_name or user or "תלמיד"
+    uid = None
     print(f"🚀 🟢 BIT-LOG: Received Multipart request from {final_student_name}. Grade: {grade}")
     # Quota Check
     # Only increment usage if OCR/Solving process starts successfully
     try:
+        # V316.5: Sort incoming files by filename to ensure image_00, image_01... order
+        files.sort(key=lambda x: x.filename)
         # 1. קריאת הבינארי
         image_bytes_list = []
         for single_file in files:

orchestrator.py CHANGED Viewed

@@ -19,8 +19,8 @@ from domain.schemas import BuddyEvent, BuddyState # V8.5: Streaming contract
 from firebase_manager import firebase_manager
 from config import IS_PRODUCTION, ENV, GEMINI_MODEL, CONFIDENCE_THRESHOLD_HIGH, CONFIDENCE_THRESHOLD_MEDIUM
-# V8.6.9: Global Guardrails (Increased for High-Complexity 5-Unit Problems)
-GLOBAL_TOKEN_LIMIT = 50000
 GLOBAL_TIMEOUT_SEC = 300
 # ==================== V7.2: TICKET 1 — AST ENRICHMENT HELPERS ====================
@@ -173,8 +173,64 @@ def validate_and_sanitize_response(resp_json, category="GENERAL"):
                     step["explanation_text"] = "הסבר לא זמין עקב חריגה מהחוזה הפדגוגי."
     resp_json["logic_error"] = resp_json.get("logic_error", False) or has_error
     return resp_json
 import asyncio
 async def safe_llm_call(generator_func, timeout_seconds=45.0):
@@ -772,12 +828,16 @@ ctx.finish("$$ 4 $$", "מעולה! הגענו לתוצאה.")
         """V231.14: Phase 1 - Extract specific values with validation and image support."""
         for attempt in range(1, 3):  # 2 attempts
             try:
                 prompt = prompts.get_data_extraction_prompt(problem_text)
-                # Build multimodal request if image is available
                 content = [prompt]
-                if image_data:
                     content.append({"mime_type": "image/png", "data": image_data})
                 res = await asyncio.wait_for(
                     self.model.generate_content_async(content),
@@ -788,8 +848,13 @@ ctx.finish("$$ 4 $$", "מעולה! הגענו לתוצאה.")
                 if match:
                     data = safe_json_loads(match.group())
                     # V261.X: Guard against parse-failure sentinel being treated as valid data
-                    if data and data.get('logic_error') and data.get('error_type') == 'PARSING_FAILURE':
                         print(f"⚠️ [BIT-LOG] Data Anchor JSON parse failed (Attempt {attempt}/2) — skipping sentinel.")
                         continue
@@ -831,6 +896,8 @@ ctx.finish("$$ 4 $$", "מעולה! הגענו לתוצאה.")
                 print(f"⚠️ [BIT-LOG] Data Anchor timeout (Attempt {attempt}/2)")
             except Exception as e:
                 print(f"⚠️ [BIT-LOG] Data Anchor error (Attempt {attempt}/2): {e}")
         print("🚨 [BIT-LOG] CRITICAL: Data Anchor extraction failed completely!")
@@ -1166,7 +1233,7 @@ ctx.finish("$$ 4 $$", "מעולה! הגענו לתוצאה.")
     # ===================== V285.0: CHECK ME (HOMEWORK VERIFICATION) =====================
-    async def _check_student_work(self, image_data: bytes, grade: str, student_name: str,
                                    student_gender: str = "M", question_id: str = "q_check"):
         """
         V285.0: Dedicated pipeline for the "Check Me" feature.
@@ -1185,8 +1252,9 @@ ctx.finish("$$ 4 $$", "מעולה! הגענו לתוצאה.")
         try:
             # V311.0: Data Slicing Guardrail
-            # First, transcribe and extract the "Absolute Truth" of the problem
-            print("📝 [CHECK-ME] Step 1.5: Extracting Problem Data (Data Slicing)...")
             problem_text = await self.transcribe_image(image_data)
             data_anchor = await self._extract_key_data(problem_text, image_data=image_data)
@@ -1198,13 +1266,15 @@ ctx.finish("$$ 4 $$", "מעולה! הגענו לתוצאה.")
                 data_anchor=data_anchor
             )
-            print(f"📝 [CHECK-ME] Sending image ({len(image_data)} bytes) + check prompt to Vision LLM...")
             response = await asyncio.wait_for(
-                self.vision_model.generate_content_async([
-                    check_prompt,
-                    {"mime_type": "image/png", "data": image_data}
-                ]),
                 timeout=60.0
             )
@@ -1442,7 +1512,7 @@ ctx.finish("$$ 4 $$", "מעולה! הגענו לתוצאה.")
             yield BuddyEvent(
                 question_id=question_id,
                 state=BuddyState.STRATEGY_READY,
-                payload={"sections": [strategy_card]} if strategy_card else None
             )
             # V300.3: Smart Visual Triggers (Product Alignment)
@@ -1679,14 +1749,14 @@ ctx.finish("$$ 4 $$", "מעולה! הגענו לתוצאה.")
                     llm_steps = llm_resp if isinstance(llm_resp, list) else llm_resp.get("steps", [])
                     # 2. השרת שולט: הפעלת ה-Polygraph על הצעדים של ה-LLM
-                    struct_ok, struct_reason = await MathPolygraph.validate_step_sequence(llm_steps)
                     poly_ok = struct_ok
                     poly_reason = struct_reason
                     if struct_ok:
                         # V1.3: Also verify algebraic consistency (e.g. A + B = C)
-                        alg_ok, alg_reason = await MathPolygraph.verify_algebraic_consistency(llm_steps)
                         if not alg_ok:
                             poly_ok = False
                             poly_reason = alg_reason
@@ -1718,13 +1788,9 @@ ctx.finish("$$ 4 $$", "מעולה! הגענו לתוצאה.")
                                 is_degraded = True
                                 degraded_reason = "polygraph_fail_forbidden_words"
                                 # Continue to next attempt
-                            elif attempts < max_attempts:
-                                # V280.0: If it's the first attempt, we MUST retry once to get better LaTeX
-                                print(f"🔄 [ROBUSTNESS] SymPy Parse Error on attempt {attempts}. Triggering retry for better LaTeX.")
-                                # We don't break here, so it continues the loop
                             else:
-                                # V280.0 Soft Fail: After retry (or max attempts), if No Forbidden Words, we TRUST the LLM.
-                                print(f"🛡️ [SOFT FAIL] Persistent Parse Error but no forbidden words. Trusting LLM output for sub-q {sub_q['id']}.")
                                 is_degraded = True
                                 degraded_reason = "sympy_soft_fail"
                                 break # Exit the attempt loop
@@ -1763,18 +1829,17 @@ ctx.finish("$$ 4 $$", "מעולה! הגענו לתוצאה.")
                         }
                 # 4. Packaging & Yielding
-                # V8.6.7 FIX: Only pass the final answer text forward to prevent massive JSON injection in future prompts
-                ans_text = solved_data.get("final_answer") if isinstance(solved_data, dict) else "הושלם"
-                context[f"result_{sub_q['id']}"] = ans_text
                 # AI Assessment Telemetry Extraction
                 if not assessment_sent and isinstance(solved_data, dict) and "assessment" in solved_data:
                     assessment_data = solved_data["assessment"]
                     if uid and assessment_data:
                         try:
-                            import asyncio
                             from analytics import analytics_manager
-                            asyncio.create_task(asyncio.to_thread(analytics_manager.update_weekly_analytics, uid, assessment_data))
                             print(f"📊 [ANALYTICS] Triggered background telemetry for {uid}")
                             assessment_sent = True
                         except Exception as e:
@@ -1907,13 +1972,14 @@ ctx.finish("$$ 4 $$", "מעולה! הגענו לתוצאה.")
             # V5.10.0: Save to History if Premium
             tier = kwargs.get('tier', 'student_basic')
-            uid = kwargs.get('uid') or uid  # Fallback to uid from line 1399
             print(f"🔍 [DEBUG HISTORY] UID: {uid}, Received Tier: '{tier}', kwargs keys: {list(kwargs.keys())}")
             is_premium = tier in ["premium", "admin", "admin_unlimited"]
             if is_premium and uid:
                 try:
-                    # Run history saving in background
-                    asyncio.create_task(self._save_exercise_history(uid, problem_text, all_solutions))
                     print(f"📚 [HISTORY] History saving scheduled for {uid}")
                 except Exception as e:
                     print(f"❌ [HISTORY] Failed to schedule history saving: {e}")
@@ -2028,15 +2094,19 @@ ctx.finish("$$ 4 $$", "מעולה! הגענו לתוצאה.")
         """
         V277.0: Main solve method with BINARY DATA SUPPORT.
         """
         image_data_list = kwargs.get('image_data_list')
         image_data = kwargs.get('image_data') or kwargs.get('image_bytes')
-        # V308.0: Dual support logic
         if image_data and not image_data_list:
             image_data_list = [image_data]
         elif image_data_list and not image_data:
             image_data = image_data_list[0]
         question_id = kwargs.get('question_id', f"q_{int(time.time())}")
         start_time = asyncio.get_event_loop().time()
         # GLOBAL_TIMEOUT_SEC = 240 # 4 minutes usually
@@ -2055,7 +2125,7 @@ ctx.finish("$$ 4 $$", "מעולה! הגענו לתוצאה.")
             print(f"📝 [V285.0] Mode=CHECK detected. Routing to _check_student_work()...")
             student_gender = kwargs.get('student_gender', 'M')
             async for event in self._check_student_work(
-                image_data=image_data,
                 grade=grade,
                 student_name=student_name,
                 student_gender=student_gender,
@@ -2065,9 +2135,17 @@ ctx.finish("$$ 4 $$", "מעולה! הגענו לתוצאה.")
             return
         # ===================== END CHECK ME ROUTING =====================
-        if image_data:
-            print(f"🔵 [BIT-LOG] Starting OCR Pipeline on Binary Data ({len(image_data)} bytes)")
-            problem_text = await self.transcribe_image(image_data)
         logger.info(f"🔎 [TRACE] RAW OCR TEXT: {problem_text}")
@@ -2127,7 +2205,7 @@ ctx.finish("$$ 4 $$", "מעולה! הגענו לתוצאה.")
                 fast_result, _, _ = validate_and_fix_solution(fast_result)
                 # Quick Polygraph check
                 _poly_steps = collect_all_steps(fast_result)
-                _poly_ok, _ = MathPolygraph.validate_step_sequence(_poly_steps)
                 if _poly_ok:
                     yield BuddyEvent(
@@ -2142,8 +2220,19 @@ ctx.finish("$$ 4 $$", "מעולה! הגענו לתוצאה.")
         # ===================== FULL STREAMING PIPELINE =====================
         print(f"🎯 [BIT-LOG] Using Streaming Pipeline Strategy: {strategy.value}")
         data_anchor = await self._extract_key_data(problem_text, image_data=image_data) or {}
         # Iterate through the streaming smart_solve
         # V5.10.2: Remove keys already passed explicitly to avoid TypeError collision
         for _key in ['student_gender', 'image_data', 'image_data_list', 'image_bytes',
@@ -2520,11 +2609,66 @@ ctx.finish("$$ 4 $$", "מעולה! הגענו לתוצאה.")
         s = re.sub(r'\s+', ' ', s)
         return s.strip()
     async def _save_exercise_history(self, uid: str, question: str, solutions: list):
-        """V5.10.0: Saves exercise history to Firestore (Premium Only)."""
         try:
             db = firebase_manager.get_db()
-            if not db: return
             # Flatten solution steps into a single string
             solution_text_parts = []
@@ -2535,13 +2679,13 @@ ctx.finish("$$ 4 $$", "מעולה! הגענו לתוצאה.")
                         title = section.get("section_title", "")
                         solution_text_parts.append(f"### {title}")
                         for step in section.get("steps", []):
-                            exp = step.get("explanation_text", "")
-                            math = step.get("math_artifact", {}).get("latex", "")
-                            if not math: math = step.get("block_math", "")
-                            solution_text_parts.append(exp)
                             if math:
-                                math = self._deep_sanitize_math(math)
                                 solution_text_parts.append(f"$${math}$$")
                 solution_text_parts.append("---")
@@ -2549,10 +2693,11 @@ ctx.finish("$$ 4 $$", "מעולה! הגענו לתוצאה.")
             from firebase_admin import firestore
             import datetime
-            # Save to history collection
             history_ref = db.collection('users').document(uid).collection('history').document()
             history_ref.set({
                 "original_question_text": question,
                 "solution_steps_text": full_solution,
                 "timestamp": firestore.SERVER_TIMESTAMP
             })

 from firebase_manager import firebase_manager
 from config import IS_PRODUCTION, ENV, GEMINI_MODEL, CONFIDENCE_THRESHOLD_HIGH, CONFIDENCE_THRESHOLD_MEDIUM
+# V8.6.9: Global Guardrails (Increased for High-Complexity 5-Unit Problems - V317.8)
+GLOBAL_TOKEN_LIMIT = 100000
 GLOBAL_TIMEOUT_SEC = 300
 # ==================== V7.2: TICKET 1 — AST ENRICHMENT HELPERS ====================
                     step["explanation_text"] = "הסבר לא זמין עקב חריגה מהחוזה הפדגוגי."
     resp_json["logic_error"] = resp_json.get("logic_error", False) or has_error
+    # V317.5: UI Sanitization Layer
+    if not resp_json.get("logic_error"):
+        resp_json = sanitize_llm_output(resp_json)
     return resp_json
+def unify_data_anchor(raw_data):
+    """V317.5: Smart Data Anchor Unification (Prevents key overwrite)"""
+    if isinstance(raw_data, dict):
+        return raw_data
+    if isinstance(raw_data, list):
+        unified = {}
+        for item in raw_data:
+            if not isinstance(item, dict): continue
+            for key, value in item.items():
+                if key in unified:
+                    # אם המפתח כבר קיים, נהפוך אותו לרשימה ונוסיף אליו
+                    if isinstance(unified[key], list):
+                        if value not in unified[key]:
+                            unified[key].append(value)
+                    else:
+                        if unified[key] != value:
+                            unified[key] = [unified[key], value]
+                else:
+                    unified[key] = value
+        return unified
+    return {} # Fallback
+def sanitize_llm_output(json_response):
+    """V317.5: Cleans technical errors (SYMPY_PARSE_ERROR) and Hebrew from LaTeX."""
+    if not isinstance(json_response, dict):
+        return json_response
+    if "steps" in json_response:
+        for step in json_response["steps"]:
+            block_math = step.get("block_math", "")
+            if block_math:
+                # Mission 2: זיהוי שגיאות של SymPy
+                if "SYMPY_PARSE_ERROR" in block_math:
+                    step["block_math"] = ""
+                    step["content_mixed"] = step.get("content_mixed", "") + "\n(המשוואה הוסתרה עקב קושי בתצוגה)."
+                # Mission 2: זיהוי אותיות בעברית בתוך ה-LaTeX
+                elif re.search(r'[א-ת]', block_math):
+                    # מעבירים את התוכן לשדה הטקסט ומוחקים את הבלוק המתמטי
+                    clean_math = block_math.replace('\\text{', '').replace('}', '').replace('$', '')
+                    step["content_mixed"] = step.get("content_mixed", "") + f"\n[{clean_math}]"
+                    step["block_math"] = ""
+    # V280.0: Also check final_answer
+    if "final_answer" in json_response and "SYMPY_PARSE_ERROR" in str(json_response["final_answer"]):
+        json_response["final_answer"] = "התקבלה תשובה מורכבת (ראה שלבים מלאים)."
+    return json_response
 import asyncio
 async def safe_llm_call(generator_func, timeout_seconds=45.0):
         """V231.14: Phase 1 - Extract specific values with validation and image support."""
         for attempt in range(1, 3):  # 2 attempts
             try:
+                print(f"⚓ [BIT-LOG] Data Anchor Extraction (Attempt {attempt}). Image Data: {type(image_data)} {len(image_data) if image_data else 'None'}")
                 prompt = prompts.get_data_extraction_prompt(problem_text)
+                if not prompt:
+                    prompt = f"Extract math data from this problem: {problem_text}"
                 content = [prompt]
+                if image_data and isinstance(image_data, bytes):
+                    # V316.9: Use canonical dict format for maximum SDK compatibility
                     content.append({"mime_type": "image/png", "data": image_data})
+                    print(f"📸 [BIT-LOG] Appended image part (size: {len(image_data)})")
                 res = await asyncio.wait_for(
                     self.model.generate_content_async(content),
                 if match:
                     data = safe_json_loads(match.group())
+                    # V317.5: Robust JSON Handling - Smart Unification
+                    data = unify_data_anchor(data)
+                    if isinstance(data, dict):
+                        print(f"⚓ [BIT-LOG] Unified Data Anchor: {json.dumps(data, ensure_ascii=False)[:100]}...")
                     # V261.X: Guard against parse-failure sentinel being treated as valid data
+                    if data and isinstance(data, dict) and data.get('logic_error') and data.get('error_type') == 'PARSING_FAILURE':
                         print(f"⚠️ [BIT-LOG] Data Anchor JSON parse failed (Attempt {attempt}/2) — skipping sentinel.")
                         continue
                 print(f"⚠️ [BIT-LOG] Data Anchor timeout (Attempt {attempt}/2)")
             except Exception as e:
                 print(f"⚠️ [BIT-LOG] Data Anchor error (Attempt {attempt}/2): {e}")
+                import traceback
+                traceback.print_exc()
         print("🚨 [BIT-LOG] CRITICAL: Data Anchor extraction failed completely!")
     # ===================== V285.0: CHECK ME (HOMEWORK VERIFICATION) =====================
+    async def _check_student_work(self, image_data_list: List[bytes], grade: str, student_name: str,
                                    student_gender: str = "M", question_id: str = "q_check"):
         """
         V285.0: Dedicated pipeline for the "Check Me" feature.
         try:
             # V311.0: Data Slicing Guardrail
+            # First, transcribe and extract the "Absolute Truth" of the problem from the FIRST image
+            image_data = image_data_list[0]
+            print("📝 [CHECK-ME] Step 1.5: Extracting Problem Data (Data Slicing from image_00)...")
             problem_text = await self.transcribe_image(image_data)
             data_anchor = await self._extract_key_data(problem_text, image_data=image_data)
                 data_anchor=data_anchor
             )
+            # Prepare images for Gemini Vision
+            vision_content = [check_prompt]
+            for img_bytes in image_data_list:
+                vision_content.append({"mime_type": "image/png", "data": img_bytes})
+            print(f"📝 [CHECK-ME] Sending {len(image_data_list)} images + check prompt to Vision LLM...")
             response = await asyncio.wait_for(
+                self.vision_model.generate_content_async(vision_content),
                 timeout=60.0
             )
             yield BuddyEvent(
                 question_id=question_id,
                 state=BuddyState.STRATEGY_READY,
+                payload={"sections": [strategy_card]} if strategy_card else {}
             )
             # V300.3: Smart Visual Triggers (Product Alignment)
                     llm_steps = llm_resp if isinstance(llm_resp, list) else llm_resp.get("steps", [])
                     # 2. השרת שולט: הפעלת ה-Polygraph על הצעדים של ה-LLM
+                    struct_ok, struct_reason = await MathPolygraph.validate_step_sequence(llm_steps, topic=sub_q.get('topic', 'GENERAL'))
                     poly_ok = struct_ok
                     poly_reason = struct_reason
                     if struct_ok:
                         # V1.3: Also verify algebraic consistency (e.g. A + B = C)
+                        alg_ok, alg_reason = await MathPolygraph.verify_algebraic_consistency(llm_steps, topic=sub_q.get('topic', 'GENERAL'))
                         if not alg_ok:
                             poly_ok = False
                             poly_reason = alg_reason
                                 is_degraded = True
                                 degraded_reason = "polygraph_fail_forbidden_words"
                                 # Continue to next attempt
                             else:
+                                # V317.8 Soft Fail: Treat SymPy Parse Error as a warning immediately to avoid retries on valid LaTeX
+                                print(f"🛡️ [SOFT FAIL] SymPy Parse Error detected (Attempt {attempts}). No forbidden words found. Trusting LLM output for sub-q {sub_q['id']}.")
                                 is_degraded = True
                                 degraded_reason = "sympy_soft_fail"
                                 break # Exit the attempt loop
                         }
                 # 4. Packaging & Yielding
+                # V8.6.7 FIX / V317.5: Only pass the final answer text forward to prevent massive JSON injection in future prompts
+                context[f"result_{sub_q['id']}"] = solved_data.get("final_answer", "No valid answer extracted") if isinstance(solved_data, dict) else "הושלם"
                 # AI Assessment Telemetry Extraction
                 if not assessment_sent and isinstance(solved_data, dict) and "assessment" in solved_data:
                     assessment_data = solved_data["assessment"]
                     if uid and assessment_data:
                         try:
                             from analytics import analytics_manager
+                            loop = asyncio.get_event_loop()
+                            loop.create_task(asyncio.to_thread(analytics_manager.update_weekly_analytics, uid, assessment_data))
                             print(f"📊 [ANALYTICS] Triggered background telemetry for {uid}")
                             assessment_sent = True
                         except Exception as e:
             # V5.10.0: Save to History if Premium
             tier = kwargs.get('tier', 'student_basic')
+            # Variable uid is already defined at start of smart_solve
             print(f"🔍 [DEBUG HISTORY] UID: {uid}, Received Tier: '{tier}', kwargs keys: {list(kwargs.keys())}")
             is_premium = tier in ["premium", "admin", "admin_unlimited"]
             if is_premium and uid:
                 try:
+                    # V315.0: Explicit scheduling with loop check
+                    loop = asyncio.get_event_loop()
+                    loop.create_task(self._save_exercise_history(uid, problem_text, all_solutions))
                     print(f"📚 [HISTORY] History saving scheduled for {uid}")
                 except Exception as e:
                     print(f"❌ [HISTORY] Failed to schedule history saving: {e}")
         """
         V277.0: Main solve method with BINARY DATA SUPPORT.
         """
+        uid = kwargs.get('uid')
         image_data_list = kwargs.get('image_data_list')
         image_data = kwargs.get('image_data') or kwargs.get('image_bytes')
         if image_data and not image_data_list:
             image_data_list = [image_data]
         elif image_data_list and not image_data:
             image_data = image_data_list[0]
+        # V316.0: CRITICAL - Ensure image_data is explicitly passed in kwargs for the rest of parameters
+        kwargs['image_data'] = image_data
+        kwargs['image_data_list'] = image_data_list
         question_id = kwargs.get('question_id', f"q_{int(time.time())}")
         start_time = asyncio.get_event_loop().time()
         # GLOBAL_TIMEOUT_SEC = 240 # 4 minutes usually
             print(f"📝 [V285.0] Mode=CHECK detected. Routing to _check_student_work()...")
             student_gender = kwargs.get('student_gender', 'M')
             async for event in self._check_student_work(
+                image_data_list=image_data_list,
                 grade=grade,
                 student_name=student_name,
                 student_gender=student_gender,
             return
         # ===================== END CHECK ME ROUTING =====================
+        if image_data_list:
+            print(f"🔵 [BIT-LOG] Starting OCR Pipeline on {len(image_data_list)} images...")
+            ocr_results = []
+            for i, img in enumerate(image_data_list):
+                print(f"📸 [BIT-LOG] Transcribing image {i}...")
+                text = await self.transcribe_image(img)
+                if text:
+                    ocr_results.append(text)
+            problem_text = "\n\n".join(ocr_results)
+            image_data = image_data_list[0] # Use first image for main processing logic/anchors
         logger.info(f"🔎 [TRACE] RAW OCR TEXT: {problem_text}")
                 fast_result, _, _ = validate_and_fix_solution(fast_result)
                 # Quick Polygraph check
                 _poly_steps = collect_all_steps(fast_result)
+                _poly_ok, _ = MathPolygraph.validate_step_sequence(_poly_steps, topic=str(strategy.value))
                 if _poly_ok:
                     yield BuddyEvent(
         # ===================== FULL STREAMING PIPELINE =====================
         print(f"🎯 [BIT-LOG] Using Streaming Pipeline Strategy: {strategy.value}")
+        # V316.0: image_data is already hydrated at the top of solve_problem.
+        # This block is now redundant but kept for safety if someone moves things.
+        if image_data is None and image_data_list:
+            image_data = image_data_list[0]
+            print("📸 [BIT-LOG] Using first image from list for Data Anchor phase. (Redundant Check)")
         data_anchor = await self._extract_key_data(problem_text, image_data=image_data) or {}
+        # V8.9.2: SEPARATE VALIDATOR PASS (Single Source of Truth)
+        if image_data and data_anchor:
+            print("🛡️ [V8.9.2] Starting Data Anchor Validation Pass...")
+            data_anchor = await self._validate_anchor(data_anchor, image_data)
         # Iterate through the streaming smart_solve
         # V5.10.2: Remove keys already passed explicitly to avoid TypeError collision
         for _key in ['student_gender', 'image_data', 'image_data_list', 'image_bytes',
         s = re.sub(r'\s+', ' ', s)
         return s.strip()
+    async def _validate_anchor(self, data_anchor: dict, image_data: bytes) -> dict:
+        """V8.9.2: Single Source of Truth Validator pass."""
+        try:
+            from prompts import get_anchor_validation_prompt
+            from utils.safe_json import safe_extract_json
+            prompt = get_anchor_validation_prompt(data_anchor)
+            # Using current model which supports Vision
+            response = await self.model.generate_content_async(
+                [
+                    {"mime_type": "image/jpeg", "data": image_data},
+                    prompt
+                ]
+            )
+            print(f"🛡️ [V8.9.3] Raw Validator Response: {response.text[:200]}...")
+            clean_anchor = safe_extract_json(response.text, "anchor_validator")
+            if clean_anchor:
+                print(f"🛡️ ✅ [V8.9.2] Anchor Validated: {len(clean_anchor.get('function_equations', []))} equations found.")
+                return clean_anchor
+            return data_anchor
+        except Exception as e:
+            print(f"⚠️ [V8.9.2] Anchor Validation failed: {e}. Falling back to raw OCR.")
+            return data_anchor
     async def _save_exercise_history(self, uid: str, question: str, solutions: list):
+        """V317.0: Saves sanitized exercise history with clean titles."""
         try:
             db = firebase_manager.get_db()
+            if not db or not uid: return
+            def generate_clean_title(ocr_raw_text):
+                try:
+                    # מנקה JSON אם קיים
+                    if isinstance(ocr_raw_text, str) and ocr_raw_text.strip().startswith('{'):
+                        data = json.loads(ocr_raw_text)
+                        text = data.get('text', '')
+                    else:
+                        text = str(ocr_raw_text)
+                    # ניקוי LaTeX וסימנים טכניים מתקדם
+                    # 1. הסרת בלוקים של מתמטיקה $...$
+                    text = re.sub(r'\$.*?\$', '', text)
+                    # 2. הסרת פקודות LaTeX נפוצות (למשל \frac{...}{...})
+                    text = re.sub(r'\\[a-zA-Z]+', '', text)
+                    # 3. הסרת סוגריים מסולסלים ומרובעים
+                    text = re.sub(r'[\{\}\[\]]', '', text)
+                    # 4. ניקוי סימנים מתמטיים שאריתיים
+                    text = re.sub(r'[\^_*=+\-/|]', '', text)
+                    # חיתוך ל-6 מילים ראשונות
+                    words = text.split()
+                    if not words: return "תרגיל במתמטיקה"
+                    title = " ".join(words[:6])
+                    if len(words) > 6: title += "..."
+                    return title.replace('\n', ' ').strip()
+                except Exception:
+                    return "תרגיל במתמטיקה"
             # Flatten solution steps into a single string
             solution_text_parts = []
                         title = section.get("section_title", "")
                         solution_text_parts.append(f"### {title}")
                         for step in section.get("steps", []):
+                            exp = step.get("explanation_text", "") or ""
+                            math = step.get("math_artifact", {}).get("latex", "") or ""
+                            if not math: math = step.get("block_math", "") or ""
+                            solution_text_parts.append(str(exp))
                             if math:
+                                math = self._deep_sanitize_math(str(math))
                                 solution_text_parts.append(f"$${math}$$")
                 solution_text_parts.append("---")
             from firebase_admin import firestore
             import datetime
+            # Save to history collection with clean title
             history_ref = db.collection('users').document(uid).collection('history').document()
             history_ref.set({
                 "original_question_text": question,
+                "display_title": generate_clean_title(question),
                 "solution_steps_text": full_solution,
                 "timestamp": firestore.SERVER_TIMESTAMP
             })

prompts.py CHANGED Viewed

@@ -230,15 +230,19 @@ def _detect_relevant_rules(text: str, category: str = "") -> list:
     return rules
 def get_data_extraction_prompt(problem_text: str) -> str:
-    """V231.14: Extract strictly from IMAGE. OCR is a secondary reference."""
-    return fr"""
-### [DATA ANCHOR SUPREMACY]
 You are provided with an IMAGE and its rough OCR transcription.
-The OCR text is highly unreliable and may contain critical errors in mathematical symbols.
-YOUR TASK: Extract the mathematical "Absolute Truth" strictly by visually inspecting the IMAGE.
-Use the OCR text only as a secondary reference for context.
-Problem Text (OCR):
 {problem_text}
 Extract:
@@ -248,6 +252,17 @@ Extract:
    - Lines: y = mx + b, ax + by = c
    - ANY equation with '=' sign!
 2. **points**: Named points like A, B, M(3,5), P(x,y)
 3. **specific_values**: Numbers like r=5, a=3, m=2
@@ -274,6 +289,15 @@ JSON format (STRUCTURE ONLY - DO NOT USE THESE EXACT VALUES):
   "point_b": "(null, null)"
 }}
 CRITICAL INSTRUCTIONS:
 1. Include ALL equations with '=' sign in function_equations!
 2. **DATA INTEGRITY (V4.3.0):** Use the data below verbatim.
@@ -293,7 +317,7 @@ def get_specialist_prompt(category, problem_text, solver_hint, grade, student_na
     """בניית הפרומפט המלכותי — המורה למתמטיקה V231.6 (Data Anchor)"""
     features = _get_grade_features(grade, category)
     relevant_rules = _detect_relevant_rules(problem_text, category)
-    rules_str = "\n".join([f"    - {r}" for r in relevant_rules]) if relevant_rules else "    (לא זוהו כללים ספציפיים — בחר רק כללים רלוונטיים לקטגוריה {category})"
     # Anchor Block — DATA INTEGRITY RULE
     anchor_block = ""
@@ -314,7 +338,6 @@ def get_specialist_prompt(category, problem_text, solver_hint, grade, student_na
     CONSTRAINT: If the data says A(0,5), use A(0,5). If it contains f(x), solve that f(x).
     """
     # V231.5: Gender-aware phrases
     if student_gender == "F":
         g = {
@@ -387,11 +410,34 @@ def get_specialist_prompt(category, problem_text, solver_hint, grade, student_na
       "concave_down": ["(b, c)"]
     }
     """
     {proof_block}
     {investigation_block}
     """
-    """
 # prompts.py - V275.1 (Safe OCR - Technique over Examples)
@@ -550,14 +596,15 @@ def get_master_prompt_v860(category: str = "", problem_text: str = ""):
     ═══════════════════════════════════════════
     ABSOLUTE RULES (violations cause immediate rejection):
     ═══════════════════════════════════════════
-    1. **DATA ANCHOR SUPREMACY:** The equations in the JSON Data Anchor are the ABSOLUTE TRUTH. Ignore any conflicting OCR text.
     2. **ZERO MAGIC MATH (BABY STEPS):**
        - NEVER skip algebraic steps. Show moving sides, dividing, and expanding brackets.
        - ALWAYS explain the mathematical rule/theorem *BEFORE* applying it.
          * Bad: "נגזור ונשווה לאפס: f'(x) = 2x"
          * Good: "כדי למצוא נקודת קיצון נגזור את הפונקציה. מכיוון שזו מנה, נשתמש בכלל המנה האומר ש... נגזור את המונה בנפרד ואת המכנה בנפרד:"
     3. **COMPLETE THE MISSION (ANTI-TRUNCATION):** Never abandon the task. You MUST reach the final numeric/algebraic answer. If asked for extrema, you must find x, y, and classify (max/min).
-    4. **UI CONTENT STRATEGY (CRITICAL - MATH SEPARATION & MULTI-STEP LOGIC):**
        - `content_mixed`: ONLY for your warm Hebrew explanation and short inline variables (e.g. $x=5$). Do NOT put long equations, derivatives, or multi-line steps here!
        - `block_math`: This is where the ACTUAL CALCULATION goes. It must contain the main equation or algebraic step in PURE LaTeX.
        - IF there is any mathematical derivation or calculation in a step, it MUST go into `block_math` and NOT into `content_mixed`. NEVER put an equation on a new line inside `content_mixed`.
@@ -574,11 +621,16 @@ def get_master_prompt_v860(category: str = "", problem_text: str = ""):
        - State factually in `content_mixed`: "אני מזהה סתירה בנתונים, בואו נבדוק שוב את הפונקציה המקורית. על פי החישוב שלי [הסבר קצר...]."
        - Focus on the TRUTH of your calculation. Never force a derivation to match a (likely misread) OCR error.
        - **Mathematical Logic (V8.6.6):** If OCR is ambiguous (e.g. $e^x$ vs $e^{-x}$), use the overall context of the question (e.g., domain, asymptotes, or known behavior) to determine the logically correct formula.
-    8. **ANTI-NEWLINE RULE (V8.6.8):**
        - In the `final_answer` field, NEVER use `\\` or `\newline` for line breaks.
        - **BIDI SAFETY:** PROHIBIT Hebrew text (like "או", "וגם") inside `final_answer` or `block_math`.
        - If there are multiple answers, separate them ONLY with English commas (e.g., "x=1, x=2").
-    9. **VARIABLE CONSISTENCY (V8.6.8):**
        - Always use standard mathematical variables ($x, y, z, m, n$) as provided in context. Never invent new variable names not found in the Data Anchor or original problem.
     10. **STRATEGY CARD NO-SPOILER RULE (CRITICAL):**
         - The `strategy_card` MUST NOT contain any numbers, final equations, derivatives, or solutions.
@@ -611,12 +663,13 @@ def get_master_prompt_v860(category: str = "", problem_text: str = ""):
         - NEVER use data, parameters (like $a=1$), or specific values that explicitly belong to a LATER sub-question (e.g., Section ב') to solve an EARLIER sub-question (e.g., Section א').
         - Solve earlier sections algebraically using general variables unless the data is part of the global question anchor.
         - If a student's finding in Section א' is required for Section ב', you may use it, but NEVER the other way around.
-    17. **NO GUESSING RULE (V310.0):** אם הנתונים בתמונה אינם מספיקים כדי לקבוע בוודאות איזה גרף מתאים לאזו פונקציה, אל תנחש! הצג את הניתוח המתמטי והסבר מה חסר כדי להגיע להכרעה. ניתן להוסיף: "מומלץ לבחון את הגרף המצורף (אם קיים) כדי לראות את המאפיינים שחישבנו."
-    ═══════════════════════════════════════════
     REQUIRED JSON STRUCTURE (EXACT KEYS):
-    ═══════════════════════════════════════════
     {{
       {graph_field}
       "strategy_card": {{
@@ -702,6 +755,10 @@ def get_check_me_prompt(grade: str, student_name: str, student_gender: str = "M"
     🎓 תפקיד: אתה בודקת שיעורי בית — מורה פרטית חמה שבודקת את העבודה של תלמיד.
     🚫 אתה לא פותר את התרגיל מחדש! אתה מנתח את מה שהתלמיד כתב.
     👤 התלמיד: {student_name}, כיתה {grade}.
     👑 מגדר: {"נקבה" if student_gender == "F" else "זכר"}. השתמש/י בלשון מתאימה.
@@ -827,6 +884,41 @@ def get_teacher_summary_prompt(student_name: str, student_gender: str = "M"):
         "formulas_to_remember": ["LaTeX נוסחה 1", "LaTeX נוסחה 2"],
         "tts_speech": "טקסט דיבור עברי נקי ל-TTS"
     }}
-    CRITICAL: Output ONLY the JSON block. No text before, no text after.
     """

     return rules
 def get_data_extraction_prompt(problem_text: str) -> str:
+    """V231.15: Template-based extraction to avoid f-string escape hell."""
+    template = r"""
+### [SACRED TRUTH: IMAGE OVER OCR]
 You are provided with an IMAGE and its rough OCR transcription.
+⚠️ CRITICAL WARNING: The OCR text is a WEAK HINT and is often WRONG, truncated, or mangled.
+💎 SACRED TRUTH: The IMAGE is the absolute source of truth.
+YOUR TASK:
+1. PERCEPTUAL PRIORITY: Visually inspect the IMAGE meticulously. Every pixel of the formula counts.
+2. OCR SKEPTICISM: If the OCR text says \frac{{1}}{{(\ln x)^2}} but the IMAGE shows \frac{{1}}{{\ln^2 x}}, you MUST ignore the OCR and extract \frac{{1}}{{\ln^2 x}}.
+3. NO INVENTIONS: Do not "fix" the math if it looks weird. Extract strictly what is drawn.
+Problem Text (OCR Hint):
 {problem_text}
 Extract:
    - Lines: y = mx + b, ax + by = c
    - ANY equation with '=' sign!
+**Mathematical Scanning Directionality (V8.9.3):**
+- **LTR PRIORITY:** Mathematical equations are STRICTLY Left-To-Right.
+- When extracting formulas embedded in Hebrew text, IGNORE the RTL flow.
+- Read the characters in their literal horizontal visual order from LEFT to RIGHT.
+- Ensure multipliers, exponents, and signs are placed exactly where they appear visually.
+**Visual Context & Continuity (V8.9.5):**
+- **SPLIT EQUATIONS:** If an equation starts on one line and ends on another, merge them into a single coherent formula.
+- **SEGMENTED CONSTRAINTS:** Look for constraints (like x > 0) near equations; they are often visually separated by space or Hebrew words but belong to the formula.
+- **MULTI-PART ANCHORING:** Maintain consistency between sub-questions (א, ב, ג). If a variable 'm' is defined in the preamble, it applies to all sub-questions.
 2. **points**: Named points like A, B, M(3,5), P(x,y)
 3. **specific_values**: Numbers like r=5, a=3, m=2
   "point_b": "(null, null)"
 }}
+**CRITICAL JSON STRUCTURE RULE (V8.9.4):**
+- You MUST output a SINGLE, flat JSON object.
+- EVERY mathematical expression, function, or parameter MUST have a UNIQUE, highly descriptive key.
+- DO NOT use a generic key like "equation" multiple times, as this will overwrite the data.
+- DO NOT output an array/list of objects.
+- **Correct Example:** { "main_function_f": "f(x)=...", "function_h": "h(x)=...", "given_extremum_x": "x=1/e" }
+- **Wrong Example:** { "equation": "f(x)=...", "equation": "h(x)=..." }
 CRITICAL INSTRUCTIONS:
 1. Include ALL equations with '=' sign in function_equations!
 2. **DATA INTEGRITY (V4.3.0):** Use the data below verbatim.
     """בניית הפרומפט המלכותי — המורה למתמטיקה V231.6 (Data Anchor)"""
     features = _get_grade_features(grade, category)
     relevant_rules = _detect_relevant_rules(problem_text, category)
+    rules_str = "\n".join([f"    - {r}" for r in relevant_rules]) if relevant_rules else f"    (לא זוהו כללים ספציפיים — בחר רק כללים רלוונטיים לקטגוריה {category})"
     # Anchor Block — DATA INTEGRITY RULE
     anchor_block = ""
     CONSTRAINT: If the data says A(0,5), use A(0,5). If it contains f(x), solve that f(x).
     """
     # V231.5: Gender-aware phrases
     if student_gender == "F":
         g = {
       "concave_down": ["(b, c)"]
     }
     """
+    prompt = f"""
+    DEPTH: {features['depth']}
+    STYLE: {features['style']} (בנימה של '{g['royal']}').
+    TONE: {features['tone']}
+    {anchor_block}
+    🎯 המשימה: פתור את התרגיל בצורה פדגוגית, מעצימה ובשלבים ברורים.
+    📜 כללי הפדגוגיה של BuddyMath (חובה!):
+{rules_str}
     {proof_block}
     {investigation_block}
+    ═══════════════════════════════════════════════════
+    הנחיות לפתרון (Solver Hint):
+    {solver_hint}
+    ═══════════════════════════════════════════════════
+    השאלה לפתרון:
+    {problem_text}
+    דגימת סגנון פנייה:
+    - פתיחה: "{g['example_open']}"
+    - סיום: "{g['example_close']}"
     """
+    return prompt
 # prompts.py - V275.1 (Safe OCR - Technique over Examples)
     ═══════════════════════════════════════════
     ABSOLUTE RULES (violations cause immediate rejection):
     ═══════════════════════════════════════════
+    1. **DATA ANCHOR SUPREMACY (V8.9.2):** The equations in the JSON Data Anchor are the ABSOLUTE TRUTH. You MUST solve the exact math provided in the Anchor. Do NOT attempt to re-read the image for the main function; trust the pre-validated Data Anchor.
     2. **ZERO MAGIC MATH (BABY STEPS):**
        - NEVER skip algebraic steps. Show moving sides, dividing, and expanding brackets.
        - ALWAYS explain the mathematical rule/theorem *BEFORE* applying it.
          * Bad: "נגזור ונשווה לאפס: f'(x) = 2x"
          * Good: "כדי למצוא נקודת קיצון נגזור את הפונקציה. מכיוון שזו מנה, נשתמש בכלל המנה האומר ש... נגזור את המונה בנפרד ואת המכנה בנפרד:"
     3. **COMPLETE THE MISSION (ANTI-TRUNCATION):** Never abandon the task. You MUST reach the final numeric/algebraic answer. If asked for extrema, you must find x, y, and classify (max/min).
+    4. **ANTI-ASCII-ART RULE (V8.8.8):** NEVER attempt to draw or sketch a graph using text characters, keyboard symbols, slashes, or ASCII art (e.g., do not use |, /, \\, -, _ to make a picture). If asked to sketch a graph, ONLY describe its mathematical properties in text (e.g., intersections, asymptotes).
+    5. **UI CONTENT STRATEGY (CRITICAL - MATH SEPARATION & MULTI-STEP LOGIC):**
        - `content_mixed`: ONLY for your warm Hebrew explanation and short inline variables (e.g. $x=5$). Do NOT put long equations, derivatives, or multi-line steps here!
        - `block_math`: This is where the ACTUAL CALCULATION goes. It must contain the main equation or algebraic step in PURE LaTeX.
        - IF there is any mathematical derivation or calculation in a step, it MUST go into `block_math` and NOT into `content_mixed`. NEVER put an equation on a new line inside `content_mixed`.
        - State factually in `content_mixed`: "אני מזהה סתירה בנתונים, בואו נבדוק שוב את הפונקציה המקורית. על פי החישוב שלי [הסבר קצר...]."
        - Focus on the TRUTH of your calculation. Never force a derivation to match a (likely misread) OCR error.
        - **Mathematical Logic (V8.6.6):** If OCR is ambiguous (e.g. $e^x$ vs $e^{-x}$), use the overall context of the question (e.g., domain, asymptotes, or known behavior) to determine the logically correct formula.
+    8. **LATEX SYNTAX RULE (V8.6.9 — CRITICAL):**
+       - You MUST strictly use LaTeX macros for all mathematical functions.
+       - You MUST write \ln (with a backslash) and NEVER just ln.
+       - You MUST write \sin, \cos, \tan, \log.
+       - Failure to use the backslash will crash the KaTeX renderer and Fail validation.
+    9. **ANTI-NEWLINE RULE (V8.6.8):**
        - In the `final_answer` field, NEVER use `\\` or `\newline` for line breaks.
        - **BIDI SAFETY:** PROHIBIT Hebrew text (like "או", "וגם") inside `final_answer` or `block_math`.
        - If there are multiple answers, separate them ONLY with English commas (e.g., "x=1, x=2").
+    10. **VARIABLE CONSISTENCY (V8.6.8):**
        - Always use standard mathematical variables ($x, y, z, m, n$) as provided in context. Never invent new variable names not found in the Data Anchor or original problem.
     10. **STRATEGY CARD NO-SPOILER RULE (CRITICAL):**
         - The `strategy_card` MUST NOT contain any numbers, final equations, derivatives, or solutions.
         - NEVER use data, parameters (like $a=1$), or specific values that explicitly belong to a LATER sub-question (e.g., Section ב') to solve an EARLIER sub-question (e.g., Section א').
         - Solve earlier sections algebraically using general variables unless the data is part of the global question anchor.
         - If a student's finding in Section א' is required for Section ב', you may use it, but NEVER the other way around.
+    17. **VISUAL GRAPH ANALYSIS (V8.9.1):** When asked to identify a graph from an image containing multiple options (e.g., I, II, III, IV), you MUST explicitly describe what you see in the image for EACH option before making a choice. Base your final selection strictly on matching your mathematical deductions (domain, asymptotes, roots) to the visual features of the graphs in the image. Ensure these descriptions are in `content_mixed` to maintain logical transparency for the student.
+    18. **NO GUESSING RULE (V310.0):** אם הנתונים בתמונה אינם מספיקים כדי לקבוע בוודאות איזה גרף מתאים לאזו פונקציה, אל תנחש! הצג את הניתוח המתמטי והסבר מה חסר כדי להגיע להכרעה. ניתן להוסיף: "מומלץ לבחון את הגרף המצורף (אם קיים) כדי לראות את המאפיינים שחישבנו."
+    ═══════════════════════════════════════════════════
     REQUIRED JSON STRUCTURE (EXACT KEYS):
+    ═══════════════════════════════════════════════════
     {{
       {graph_field}
       "strategy_card": {{
     🎓 תפקיד: אתה בודקת שיעורי בית — מורה פרטית חמה שבודקת את העבודה של תלמיד.
     🚫 אתה לא פותר את התרגיל מחדש! אתה מנתח את מה שהתלמיד כתב.
+    📸 היררכיית תמונות:
+    1. התמונה הראשונה (image_00) היא השאלה המקורית מהספר/מבחן.
+    2. כל שאר התמונות (image_01 ומעלה) מכילות את שלבי הפתרון שכתב התלמיד בכתב יד.
     👤 התלמיד: {student_name}, כיתה {grade}.
     👑 מגדר: {"נקבה" if student_gender == "F" else "זכר"}. השתמש/י בלשון מתאימה.
         "formulas_to_remember": ["LaTeX נוסחה 1", "LaTeX נוסחה 2"],
         "tts_speech": "טקסט דיבור עברי נקי ל-TTS"
     }}
+    """
+def get_anchor_validation_prompt(ocr_json: dict) -> str:
+    """
+    V8.9.2: Dedicated prompt for the Orchestrator's Data Anchor Validator.
+    Takes the raw OCR JSON and the image to produce a syntactically perfect 'Absolute Truth'.
+    """
+    ocr_str = json.dumps(ocr_json, indent=2, ensure_ascii=False)
+    return f"""
+    You are a strict Mathematical Transcriber & Validator.
+    Look at the provided OCR JSON and the original image.
+    OCR JSON (Raw Extraction):
+    {ocr_str}
+    ⚠️ MISSION:
+    The OCR often corrupts complex fractions, missing brackets, or mangling operators.
+    Your ONLY job is to output a verified, syntactically perfect JSON containing the main mathematical functions and parameters exactly as they appear in the image.
+    **CRITICAL VISION PROTOCOL:**
+    You are a strict Visual Validator, NOT a text auto-completer. When evaluating the extracted math, you MUST cross-reference the text directly against the ORIGINAL IMAGE.
+    If the input string is syntactically broken, truncated, or missing components (e.g., dropped fractions, missing multipliers outside parentheses), DO NOT delete terms to artificially 'fix' the equation.
+    You MUST visually reconstruct the EXACT mathematical expression pixel-for-pixel as it appears in the image. Do not hallucinate values (e.g., changing $e$ to $\sqrt{e}$). MATCH THE PIXELS EXACTLY.
+    RULES:
+    1. Fix any hanging operators (e.g., 'a+', 'x-').
+    2. Restore missing multipliers or fractions (e.g., if image shows '1/x' but OCR missed it).
+    3. Ensure all brackets are closed and standard variables are used.
+    4. If the OCR is correct, keep it as is.
+    5. CRITICAL: Do NOT solve the problem. Do NOT explain.
+    6. Output ONLY the corrected JSON following the exact structure of the input.
+    7. V8.9.2: If you fix a critical syntax error (like 'a+' -> 'a+1/x^2'), ensure the final result is mathematically plausible based on the visual evidence.
+    8. **HORIZONTAL VERIFICATION (V8.9.3):** Check the horizontal order of elements. If an element is to the left of a bracket in the image, it MUST be to the left of the bracket in your JSON. Do NOT let the Hebrew text flow (RTL) swap the positions of multipliers or terms.
+    9. **CRITICAL JSON STRUCTURE RULE (V8.9.4):** Every output MUST be a flat JSON dictionary with UNIQUE keys. Do NOT use duplicate keys like "equation" multiple times. If there are multiple equations, use "equation_1", "equation_2", etc.
+    Return ONLY the corrected JSON.
     """

strategy_manager.py CHANGED Viewed

@@ -68,7 +68,7 @@ System Prompt Override: הוסף לתחילת ההסבר שלך את ההערה
 """
                 prompt = soft_recovery_note + "\n" + prompt
-            prompt += f"\n\n🎯 MISSION: Solve ONLY part: {problem_text}"
             llm_response = await self._call_llm(prompt, image_data, category, image_pages, proof_graph_steps_count)
             # V5.8.2: Guard against raw list responses
@@ -127,7 +127,7 @@ System Prompt Override: הוסף לתחילת ההסבר שלך את ההערה
     async def _call_llm(self, prompt, image_data, category, image_pages, proof_graph_steps_count=1):
         # V8.5: No More Patchwork. Use centralized V4.3.0 standard.
         v430_instruction = prompts.get_master_prompt_v430(category=category, problem_text=prompt)
-        prompt += v430_instruction
         from google.generativeai.types import GenerationConfig
         import asyncio

 """
                 prompt = soft_recovery_note + "\n" + prompt
+            prompt = (prompt or "") + f"\n\n🎯 MISSION: Solve ONLY part: {problem_text}"
             llm_response = await self._call_llm(prompt, image_data, category, image_pages, proof_graph_steps_count)
             # V5.8.2: Guard against raw list responses
     async def _call_llm(self, prompt, image_data, category, image_pages, proof_graph_steps_count=1):
         # V8.5: No More Patchwork. Use centralized V4.3.0 standard.
         v430_instruction = prompts.get_master_prompt_v430(category=category, problem_text=prompt)
+        prompt = (prompt or "") + (v430_instruction or "")
         from google.generativeai.types import GenerationConfig
         import asyncio

visuals.py CHANGED Viewed

@@ -7,8 +7,15 @@ from sympy import sympify, symbols, lambdify, Abs, sin, cos, tan, sqrt, log, ln,
 print("✅ 🟢 [BIT-LOG: Visuals V277.0] - Finer Lines + Grid Optimization")
 def sanitize_math_for_sympy(expr_str: str) -> str:
     if not expr_str: return ""
     # 1. Strip Hebrew
     expr_str = re.sub(r'[\u0590-\u05FF]', '', expr_str)
@@ -97,25 +104,31 @@ def _run_with_timeout(func, args, timeout_duration=2.0, default_value=None):
             return default_value
 def generate_plot(latex_input: str, context_text: str = "", geometric_entities: dict = None) -> str:
-    print(f"📈 [VISUALS] generate_plot called with: latex='{latex_input}', geo_entities={list(geometric_entities.keys()) if geometric_entities else None}")
-    # We still sanitize the full input, but we don't split it by comma unconditionally
-    # geometry check uses the first element to avoid implicit matches on secondary graphs
-    first_expr = latex_input.split(',')[0].strip()
-    safe_expr_first = sanitize_math_for_sympy(first_expr)
-    # We sanitize the FULL input to pass to _plot_func
-    safe_expr_full = sanitize_math_for_sympy(latex_input)
-    # בדיקה חכמה: האם באמת יש ישויות גיאומטריות מלאות?
-    has_real_geo = False
-    if geometric_entities:
-        has_real_geo = any(len(v) > 0 for v in geometric_entities.values() if isinstance(v, list))
-    is_implicit = "=" in safe_expr_first or ("x" in safe_expr_first and "y" in safe_expr_first and "**2" in safe_expr_first) or has_real_geo
-    if is_implicit:
-        return _plot_geo(safe_expr_first, context_text, geometric_entities)
-    return _plot_func(safe_expr_full)
 def _plot_func(expr_str):
     try:
@@ -145,6 +158,10 @@ def _plot_func(expr_str):
         for idx, single_expr in enumerate(expressions):
             try:
                 # V8.6.9: Explicit try-except for SYMPY_PARSE_ERROR prevention
                 try:
                     expr = _run_with_timeout(sympify, (single_expr, None, local_dict), timeout_duration=2.0)
@@ -154,10 +171,16 @@ def _plot_func(expr_str):
                 if not expr: continue
-                # טיפול בפרמטרים חופשיים
-                free_syms = expr.free_symbols - {x}
-                if free_syms:
-                    expr = expr.subs({s: 1 for s in free_syms})
                 f_np = lambdify(x, expr, modules=['numpy'])
                 y_vals = f_np(x_vals)

 print("✅ 🟢 [BIT-LOG: Visuals V277.0] - Finer Lines + Grid Optimization")
+def clean_latex_for_sympy(latex_str):
+    """V318.0: Conerts |x| to Abs(x) and \ln to log for SymPy compatibility."""
+    cleaned = re.sub(r'\|(.*?)\|', r'Abs(\1)', latex_str)
+    cleaned = cleaned.replace(r'\ln', 'log')
+    return cleaned
 def sanitize_math_for_sympy(expr_str: str) -> str:
     if not expr_str: return ""
+    expr_str = clean_latex_for_sympy(expr_str)
     # 1. Strip Hebrew
     expr_str = re.sub(r'[\u0590-\u05FF]', '', expr_str)
             return default_value
 def generate_plot(latex_input: str, context_text: str = "", geometric_entities: dict = None) -> str:
+    try:
+        print(f"📈 [VISUALS] generate_plot called with: latex='{latex_input}', geo_entities={list(geometric_entities.keys()) if geometric_entities else None}")
+        # We still sanitize the full input, but we don't split it by comma unconditionally
+        # geometry check uses the first element to avoid implicit matches on secondary graphs
+        first_expr = latex_input.split(',')[0].strip()
+        safe_expr_first = sanitize_math_for_sympy(first_expr)
+        # We sanitize the FULL input to pass to _plot_func
+        safe_expr_full = sanitize_math_for_sympy(latex_input)
+        # בדיקה חכמה: האם באמת יש ישויות גיאומטריות מלאות?
+        has_real_geo = False
+        if geometric_entities:
+            has_real_geo = any(len(v) > 0 for v in geometric_entities.values() if isinstance(v, list))
+        is_implicit = "=" in safe_expr_first or ("x" in safe_expr_first and "y" in safe_expr_first and "**2" in safe_expr_first) or has_real_geo
+        if is_implicit:
+            return _plot_geo(safe_expr_first, context_text, geometric_entities)
+        return _plot_func(safe_expr_full)
+    except Exception as e:
+        print(f"📈 🔴 [BIT-LOG] CRITICAL PLOT ERROR: {e}")
+        import traceback
+        traceback.print_exc()
+        return None
 def _plot_func(expr_str):
     try:
         for idx, single_expr in enumerate(expressions):
             try:
+                # V318.0: Robust sanitization within the loop
+                single_expr = sanitize_math_for_sympy(single_expr)
+                if not single_expr: continue
                 # V8.6.9: Explicit try-except for SYMPY_PARSE_ERROR prevention
                 try:
                     expr = _run_with_timeout(sympify, (single_expr, None, local_dict), timeout_duration=2.0)
                 if not expr: continue
+                # Ticket 1 Fix: Handle free symbols (parameters like a, b, k)
+                # We only want to plot with respect to 'x', so replace everything else with 1
+                expr_symbols = expr.free_symbols
+                params = [s for s in expr_symbols if s != x]
+                if params:
+                    print(f"🛠️ [VISUALS] Substituting parameters {params} with 1 for sketch.")
+                    expr = expr.subs({s: 1 for s in params})
+                # Final check if the result is still symbolic
+                if not expr.is_finite: continue
                 f_np = lambdify(x, expr, modules=['numpy'])
                 y_vals = f_np(x_vals)