Spaces:

dotandru
/

BuddyMath

Sleeping

App Files Files Community

dotandru commited on Mar 14

Commit

5230898

1 Parent(s): 5e09541

V280.0: Robust Math Validation Fixes & Model Lockdown (RCE Protection, Multiline Regex, Soft Fail)

Browse files

Files changed (9) hide show

config.py +5 -0
deploy_hf +1 -1
domain/math_validator.py +152 -40
find_models.py +0 -8
orchestrator.py +41 -17
prompts.py +16 -3
test_veo.py +0 -45
tests/test_validation_robustness.py +84 -0
video_generator.py +0 -87

config.py CHANGED Viewed

@@ -54,3 +54,8 @@ CONFIDENCE_THRESHOLD_MEDIUM = 0.55 if IS_PRODUCTION else 0.01
 print(f"[CONFIG] Loading {ENV.upper()} configuration.")
 print(f"[CONFIG] Project: {PROJECT_ID}")
 print(f"[CONFIG] Bucket: {STORAGE_BUCKET}")

 print(f"[CONFIG] Loading {ENV.upper()} configuration.")
 print(f"[CONFIG] Project: {PROJECT_ID}")
 print(f"[CONFIG] Bucket: {STORAGE_BUCKET}")
+# V3.1.3: Model Hardening - Restricted to gemini-2.0-flash ONLY
+# To change model, update the environment variable 'GEMINI_MODEL'
+GEMINI_MODEL = os.getenv("GEMINI_MODEL", "gemini-2.0-flash")
+print(f"[CONFIG] Active Model: {GEMINI_MODEL}")

deploy_hf CHANGED Viewed

	@@ -1 +1 @@
1	- Subproject commit ~~3fd45984f20ab7410249dddd5999834076d2a512~~


1	+ Subproject commit 080bbe367f70411fa816f96e2a9ea47a63b728b2

domain/math_validator.py CHANGED Viewed

@@ -4,8 +4,9 @@ import logging
 import multiprocessing
 import time
 import asyncio
-from typing import Tuple, List
 import sympy
 logger = logging.getLogger(__name__)
@@ -69,15 +70,9 @@ def _latex_to_sympy_str(latex_str: str) -> str:
     # 7. Implicit multiplication: 2x → 2*x (only if not inside a word)
     s = re.sub(r'(\d)([a-zA-Z(])', r'\1*\2', s)
-    # 8. Handle equals sign (for equations)
-    if '=' in s:
-        parts = s.split('=', 1)
-        # V310.0: If one side is empty, ignore the '='
-        if parts[0].strip() and parts[1].strip():
-            s = f"({parts[0]}) - ({parts[1]})"
-        else:
-            s = parts[0] if parts[0].strip() else parts[1]
     # 9. Final cleanup: Remove illegal SymPy chars like ', ", ?, !
     s = re.sub(r'[?!\'"]', '', s)
     s = re.sub(r'\s+', ' ', s)
@@ -95,22 +90,59 @@ class MathPolygraph:
     @staticmethod
     def _sympify_worker(expr_str: str, queue: multiprocessing.Queue):
         try:
-            sympy.sympify(expr_str, evaluate=False)
             queue.put(True)
-        except Exception:
             queue.put(False)
     @staticmethod
     def _sympify_with_timeout(expr_str: str) -> bool:
         queue = multiprocessing.Queue()
-        process = multiprocessing.Process(target=MathPolygraph._sympify_worker, args=(expr_str, queue))
         try:
             process.start()
-            process.join(timeout=2)
             if process.is_alive():
                 process.terminate()
                 process.join()
                 return None # TIMEOUT
             if not queue.empty():
                 return queue.get()
@@ -121,25 +153,80 @@ class MathPolygraph:
             return False
     @staticmethod
-    async def _validate_single(math_latex: str, step_id) -> Tuple[bool, str]:
-        if not math_latex or not math_latex.strip():
-            return True, ""
-        raw = str(math_latex).strip()
-        if _is_plaintext(raw):
-            return True, ""
-        sympy_str = _latex_to_sympy_str(raw)
-        if not sympy_str or sympy_str in ('', '-', '()', '( ) - ( )'):
             return True, ""
-        try:
-            status = await asyncio.to_thread(MathPolygraph._sympify_with_timeout, sympy_str)
-            if status is True:
                 return True, ""
-            elif status is None:
-                return True, f"SYMPY_TIMEOUT:step_{step_id}"
-            else:
-                return False, f"SYMPY_PARSE_ERROR:step_{step_id}"
-        except Exception:
-            return True, "" # Soft-fail on unexpected errors
     @staticmethod
     async def validate_step_sequence(steps: List[dict]) -> Tuple[bool, str]:
@@ -165,21 +252,46 @@ class MathPolygraph:
         Supports expressions and equations (by converting to 'expr = 0').
         """
         try:
-            s1 = _latex_to_sympy_str(latex1)
-            s2 = _latex_to_sympy_str(latex2)
-            if not s1 or not s2:
-                return True # Can't verify, don't block
-            expr1 = sympy.sympify(s1)
-            expr2 = sympy.sympify(s2)
-            # Use simplification to check for zero difference
             diff = sympy.simplify(expr1 - expr2)
             return diff == 0
         except Exception as e:
             logger.warning(f"[POLYGRAPH] Equivalence check failed: {e}")
-            return True # Assume OK if sympy fails to parse/compare
     @staticmethod
     async def verify_algebraic_consistency(steps: List[dict]) -> Tuple[bool, str]:

 import multiprocessing
 import time
 import asyncio
+from typing import Tuple, List, Optional
 import sympy
+from sympy.parsing.sympy_parser import parse_expr
 logger = logging.getLogger(__name__)
     # 7. Implicit multiplication: 2x → 2*x (only if not inside a word)
     s = re.sub(r'(\d)([a-zA-Z(])', r'\1*\2', s)
+    # 8. V280.0: Equals sign handling is now moved to _check_segment
+    # for more robust parsing of equations.
     # 9. Final cleanup: Remove illegal SymPy chars like ', ", ?, !
     s = re.sub(r'[?!\'"]', '', s)
     s = re.sub(r'\s+', ' ', s)
     @staticmethod
     def _sympify_worker(expr_str: str, queue: multiprocessing.Queue):
+        """
+        V280.0: Security Hardened Worker.
+        1. Character Whitelist: Only allow safe mathematical characters.
+        2. parse_expr(evaluate=False): Prevent RCE and immediate evaluation.
+        """
         try:
+            # RCE Prevention: Extreme character whitelist before parsing
+            # V280.0 FIX: Added ! for factorials and ensured strict match.
+            safe_pattern = r'^[a-zA-Z0-9\s\+\-\*\/\^\(\)\.\,\!\%\=]+$'
+            if not re.match(safe_pattern, expr_str):
+                queue.put(False)
+                return
+            # Security: evaluate=False stops automatic eval() of passed strings.
+            res = parse_expr(expr_str, evaluate=False)
+            # V280.0 FIX: Catch arithmetic errors like 1/0.
+            # In SymPy, 1/0 evaluates to 'zoo' (ComplexInfinity).
+            if res is not None:
+                # evaluate the expression
+                evaluated = res.doit()
+                # If the result is infinite (zoo, oo, -oo) or NaN, treat as error
+                # We check is_finite directly.
+                if hasattr(evaluated, 'is_finite') and evaluated.is_finite is False:
+                    raise ZeroDivisionError("Infinite or undefined result")
+                if hasattr(evaluated, 'is_nan') and evaluated.is_nan:
+                    raise ValueError("NaN result")
             queue.put(True)
+        except (ZeroDivisionError, TypeError, ValueError, Exception) as e:
             queue.put(False)
     @staticmethod
     def _sympify_with_timeout(expr_str: str) -> bool:
+        """Helper to run parsing in a separate process to enforce timeout."""
+        if not expr_str or not expr_str.strip():
+            return True
+        # Strip characters that might survive _latex_to_sympy_str but fail whitelist
+        s = expr_str.replace('\\', '').replace('_', '').replace('{', '(').replace('}', ')')
         queue = multiprocessing.Queue()
+        process = multiprocessing.Process(target=MathPolygraph._sympify_worker, args=(s, queue))
         try:
             process.start()
+            # Windows needs a generous timeout for cold process start + SymPy import.
+            # 10 seconds is safe for verification/testing.
+            process.join(timeout=10)
             if process.is_alive():
                 process.terminate()
                 process.join()
+                with open('debug_math.val', 'a', encoding='utf-8') as f:
+                    f.write(f"[{time.time()}] TIMEOUT on '{s}'\n")
                 return None # TIMEOUT
             if not queue.empty():
                 return queue.get()
             return False
     @staticmethod
+    async def _validate_single(text: str, step_id) -> Tuple[bool, str]:
+        """
+        V280.0 REDESIGN:
+        1. No Blind Stripping: Extracts $...$ or $$...$$ using re.finditer with DOTALL.
+        2. Security: Uses parse_expr(evaluate=False).
+        3. Equations: Splits by '=' and validates parts to bypass SymPy's '=' limitation.
+        4. Multi-Equal: Handles x=y=5 without crashing.
+        5. Empty Guard: Skips $$$$.
+        """
+        if not text or not text.strip():
             return True, ""
+        # regex: find both $$display$$ and $inline$ blocks. DOTALL allows multi-line display math.
+        # Group 1 = display math, Group 2 = inline math
+        math_pattern = re.compile(r'\$\$(.*?)\$\$|\$(.*?)\$', re.DOTALL)
+        matches = list(re.finditer(math_pattern, text))
+        if not matches:
+            # V280.0 Rule: If no delimiters are found, treat the whole string as plain text
+            # or try to parse if it looks like math (existing behavior for backward compatibility)
+            if _is_plaintext(text):
                 return True, ""
+            return await MathPolygraph._check_segment(text, step_id)
+        for match in matches:
+            # Group 1 (Display) or Group 2 (Inline)
+            content = (match.group(1) or match.group(2) or "").strip()
+            # 5. Empty String Guard
+            if not content:
+                continue
+            # V280.0 Fix: Multi-line display math might contain multiple equations.
+            # Split by newline before validating segments.
+            sub_segments = [s.strip() for s in content.split('\n') if s.strip()]
+            for sub in sub_segments:
+                ok, reason = await MathPolygraph._check_segment(sub, step_id)
+                if not ok:
+                    return False, reason
+        return True, ""
+    @staticmethod
+    async def _check_segment(raw_segment: str, step_id) -> Tuple[bool, str]:
+        """Internal helper to validate a single extracted math segment."""
+        # 4. Multi-Equal Sign Handling & Unpacking Crash Prevention
+        eq_count = raw_segment.count('=')
+        parts_to_check = []
+        if eq_count >= 1:
+            # Split by all equalities and check each segment (e.g. x=y=5 -> check x, y, 5)
+            # This bypasses SymPy's inability to parse "=" and prevents split() unpacking errors.
+            parts_to_check = [p.strip() for p in raw_segment.split('=') if p.strip()]
+        else:
+            parts_to_check = [raw_segment]
+        for part in parts_to_check:
+            sympy_str = _latex_to_sympy_str(part)
+            if not sympy_str or sympy_str in ('', '-', '()', '( )'):
+                continue
+            try:
+                # Run with timeout to prevent ReDoS or complex simplification hangs
+                status = await asyncio.to_thread(MathPolygraph._sympify_with_timeout, sympy_str)
+                if status is False:
+                    return False, f"SYMPY_PARSE_ERROR:step_{step_id}"
+                elif status is None:
+                    # Timeout is treated as a soft warning for now
+                    logger.warning(f"[V280.0] SymPy timeout on segment: {part}")
+            except Exception as e:
+                logger.error(f"[V280.0] Unexpected validation crash: {e}")
+                return False, f"SYMPY_CRASH:step_{step_id}"
+        return True, ""
     @staticmethod
     async def validate_step_sequence(steps: List[dict]) -> Tuple[bool, str]:
         Supports expressions and equations (by converting to 'expr = 0').
         """
         try:
+            # V280.0: Handle Equations in Equivalence Check
+            # If both contain '=', split and compare parts.
+            # Only recurse once!
+            if '=' in latex1 and '=' in latex2 and latex1.count('=') == 1 and latex2.count('=') == 1:
+                parts1 = [p.strip() for p in latex1.split('=') if p.strip()]
+                parts2 = [p.strip() for p in latex2.split('=') if p.strip()]
+                if len(parts1) == 2 and len(parts2) == 2:
+                    return MathPolygraph.are_equivalent(parts1[0], parts2[0]) and \
+                           MathPolygraph.are_equivalent(parts1[1], parts2[1])
+            s1_raw = _latex_to_sympy_str(latex1)
+            s2_raw = _latex_to_sympy_str(latex2)
+            # Check for inequalities in raw LaTeX to be safe
+            inequalities = ['<', '>', r'\leq', r'\geq', r'\neq', r'\leq', r'\geq']
+            if any(iq in latex1 for iq in inequalities) or any(iq in latex2 for iq in inequalities):
+                return latex1.strip() == latex2.strip()
+            # Security: Strict Whitelist for Equivalence Check
+            safe_pattern = r'^[a-zA-Z0-9\s\+\-\*\/\^\(\)\.\,\!\=]+$'
+            def is_safe(s):
+                clean = s.replace('\\', '').replace('_', '').replace('{', '(').replace('}', ')')
+                return bool(re.match(safe_pattern, clean))
+            if not (is_safe(s1_raw) and is_safe(s2_raw)):
+                return latex1.strip() == latex2.strip()
+            expr1 = parse_expr(s1_raw, evaluate=False)
+            expr2 = parse_expr(s2_raw, evaluate=False)
+            # "Variable Trap": Basic structural equivalence if variables are involved
+            if len(expr1.free_symbols) > 0 or len(expr2.free_symbols) > 0:
+                return sympy.simplify(expr1 - expr2) == 0
+            # Numerical Identity check: simplify(LHS - RHS) == 0
             diff = sympy.simplify(expr1 - expr2)
             return diff == 0
         except Exception as e:
             logger.warning(f"[POLYGRAPH] Equivalence check failed: {e}")
+            return False
     @staticmethod
     async def verify_algebraic_consistency(steps: List[dict]) -> Tuple[bool, str]:

find_models.py DELETED Viewed

@@ -1,8 +0,0 @@
-from google import genai
-client = genai.Client(api_key="YOUR_GEMINI_API_KEY_HERE")
-print("מחפש מודלי Pro זמינים למפתח שלך...")
-for model in client.models.list():
-    if "pro" in model.name:
-        print(model.name)

orchestrator.py CHANGED Viewed

@@ -17,6 +17,7 @@ from smart_solver import sign_step, resolve_ast_target, execute_action
 import domain.telemetry as telemetry
 from domain.schemas import BuddyEvent, BuddyState # V8.5: Streaming contract
 from firebase_manager import firebase_manager
 # V8.6.9: Global Guardrails (Increased for High-Complexity 5-Unit Problems)
 GLOBAL_TOKEN_LIMIT = 50000
@@ -385,11 +386,11 @@ class BuddyOrchestrator:
         genai.configure(api_key=os.environ.get("GOOGLE_API_KEY", ""))
         # V8.6.1: Force Strict JSON Output to prevent Markdown/Preamble leakage
         self.model = genai.GenerativeModel(
-            model_name='gemini-2.0-flash',
             generation_config={"response_mime_type": "application/json"}
         )
         self.vision_model = genai.GenerativeModel(
-            model_name='gemini-2.0-flash',
             generation_config={"response_mime_type": "application/json"}
         )
         self.smart_solver = SmartSolver()  # No model parameter needed
@@ -1177,11 +1178,18 @@ ctx.finish("$$ 4 $$", "מעולה! הגענו לתוצאה.")
         )
         try:
             # Step 2: Build check-me prompt and send to Vision LLM
             check_prompt = prompts.get_check_me_prompt(
                 grade=grade,
                 student_name=student_name,
-                student_gender=student_gender
             )
             print(f"📝 [CHECK-ME] Sending image ({len(image_data)} bytes) + check prompt to Vision LLM...")
@@ -1324,8 +1332,15 @@ ctx.finish("$$ 4 $$", "מעולה! הגענו לתוצאה.")
                 )
             # ═══════════════════════════════════════════════════════════
-            # Step 8: COMPLETE with final answer
             # ═══════════════════════════════════════════════════════════
             if verdict == "correct":
                 final_answer_text = f"✅ כל הכבוד! הפתרון נכון! {encouragement}"
             elif verdict == "unreadable":
@@ -1333,7 +1348,7 @@ ctx.finish("$$ 4 $$", "מעולה! הגענו לתוצאה.")
             elif verdict == "methodology_error":
                 final_answer_text = f"📐 יש בעיה בשיטת הפתרון. {methodology_note}"
             else:
-                final_answer_text = f"📝 התשובה הנכונה: ${correct_answer}$" if correct_answer else encouragement
             yield BuddyEvent(
                 question_id=question_id,
@@ -1344,6 +1359,8 @@ ctx.finish("$$ 4 $$", "מעולה! הגענו לתוצאה.")
                     "is_correct": verdict == "correct",
                     "score": score,
                     "mistakes": mistakes,
                     "problem_identified": problem_identified
                 }
             )
@@ -1681,21 +1698,30 @@ ctx.finish("$$ 4 $$", "מעולה! הגענו לתוצאה.")
                         # מעקף: אם השגיאה היא רק בעיית קריאה של סימנים (אי-שוויונים/חיצים), סומכים על ה-LLM ויוצאים
                         if "SYMPY_PARSE_ERROR" in str(poly_reason):
-                            # V8.6.9: Sanity Check for forbidden words in response to prevent blindly trusting "contradiction" answers
-                            # V310.0: Removed "אין פתרון" as it is a valid mathematical result. Added JSON encoding fix.
-                            forbidden_words = ["סתירה בנתונים", "לא הגיוני", "שגיאה בחישוב שלי", "אני מזהה סתירה"]
                             import json
-                            response_text = json.dumps(llm_resp, ensure_ascii=False) # Search in full JSON representation with correct encoding
-                            if any(word in response_text for word in forbidden_words):
-                                print(f"🛑 [HOTFIX BLOCKED] Forbidden word detected in SYMPY_PARSE_ERROR response. Not Trusting LLM.")
                                 is_degraded = True
                                 degraded_reason = "polygraph_fail_forbidden_words"
-                                # Continue to second attempt instead of breaking
                             else:
-                                print(f"🛡️ [HOTFIX] Bypassing SymPy Parse Error. Trusting LLM output for sub-q {sub_q['id']}.")
                                 is_degraded = True
-                                degraded_reason = "sympy_parse_bypass"
-                                break  # יוצאים מהלופ מיד! חוסך ניסיון שני ומונע בזבוז טוקנים
                         elif attempts == max_attempts:
                             print(f"⚠️ [HOTFIX] Max attempts reached. Forcing LLM response despite Polygraph failure.")
                             is_degraded = True
@@ -2049,8 +2075,6 @@ ctx.finish("$$ 4 $$", "מעולה! הגענו לתוצאה.")
         # חייב להכיל לפחות אות אנגלית אחת, מספר, או סימן מתמטי
         has_math_anchor = bool(re.search(r'[0-9xyzXYZ=+\-\(\)]', ocr_clean))
-        from config import CONFIDENCE_THRESHOLD_HIGH, CONFIDENCE_THRESHOLD_MEDIUM
         # V5.7.5: Short Math Bypass (Happy Flow for simple equations)
         # Often simple equations like $2+2=?$ yield low OCR confidence but are valid.
         is_short_math = has_math_anchor and len(ocr_clean) < 15 and len(ocr_clean) > 2

 import domain.telemetry as telemetry
 from domain.schemas import BuddyEvent, BuddyState # V8.5: Streaming contract
 from firebase_manager import firebase_manager
+from config import IS_PRODUCTION, ENV, GEMINI_MODEL, CONFIDENCE_THRESHOLD_HIGH, CONFIDENCE_THRESHOLD_MEDIUM
 # V8.6.9: Global Guardrails (Increased for High-Complexity 5-Unit Problems)
 GLOBAL_TOKEN_LIMIT = 50000
         genai.configure(api_key=os.environ.get("GOOGLE_API_KEY", ""))
         # V8.6.1: Force Strict JSON Output to prevent Markdown/Preamble leakage
         self.model = genai.GenerativeModel(
+            model_name=GEMINI_MODEL,
             generation_config={"response_mime_type": "application/json"}
         )
         self.vision_model = genai.GenerativeModel(
+            model_name=GEMINI_MODEL,
             generation_config={"response_mime_type": "application/json"}
         )
         self.smart_solver = SmartSolver()  # No model parameter needed
         )
         try:
+            # V311.0: Data Slicing Guardrail
+            # First, transcribe and extract the "Absolute Truth" of the problem
+            print("📝 [CHECK-ME] Step 1.5: Extracting Problem Data (Data Slicing)...")
+            problem_text = await self.transcribe_image(image_data)
+            data_anchor = await self._extract_key_data(problem_text)
             # Step 2: Build check-me prompt and send to Vision LLM
             check_prompt = prompts.get_check_me_prompt(
                 grade=grade,
                 student_name=student_name,
+                student_gender=student_gender,
+                data_anchor=data_anchor
             )
             print(f"📝 [CHECK-ME] Sending image ({len(image_data)} bytes) + check prompt to Vision LLM...")
                 )
             # ═══════════════════════════════════════════════════════════
+            # Step 8: COMPLETE with final answer & Protocol Alignment
             # ═══════════════════════════════════════════════════════════
+            from pedagogical_builder import sanitize_math_text
+            # V311.0: LaTeX UI Safety
+            safe_correct_answer = sanitize_math_text(correct_answer) if correct_answer else ""
+            if safe_correct_answer and not safe_correct_answer.startswith("$$") and not safe_correct_answer.startswith("$"):
+                safe_correct_answer = f"$${safe_correct_answer}$$"
             if verdict == "correct":
                 final_answer_text = f"✅ כל הכבוד! הפתרון נכון! {encouragement}"
             elif verdict == "unreadable":
             elif verdict == "methodology_error":
                 final_answer_text = f"📐 יש בעיה בשיטת הפתרון. {methodology_note}"
             else:
+                final_answer_text = f"📝 התשובה הנכונה: {safe_correct_answer}" if safe_correct_answer else encouragement
             yield BuddyEvent(
                 question_id=question_id,
                     "is_correct": verdict == "correct",
                     "score": score,
                     "mistakes": mistakes,
+                    "feedback": encouragement, # Protocol Alignment
+                    "correct_answer": safe_correct_answer, # Protocol Alignment
                     "problem_identified": problem_identified
                 }
             )
                         # מעקף: אם השגיאה היא רק בעיית קריאה של סימנים (אי-שוויונים/חיצים), סומכים על ה-LLM ויוצאים
                         if "SYMPY_PARSE_ERROR" in str(poly_reason):
+                            # V280.0 + V310.0: Smart Retry & Soft Fail with JSON Security check
+                            # 1. Logic: Only allow bypass if it's NOT the first attempt OR it's a "Soft Fail" case.
+                            # 2. Pedagogical: "אין פתרון" is allowed. "לא ייתכן" remains removed.
+                            forbidden_words = ["סתירה בנתונים", "לא הגיוני", "שגיאה בחישוב שלי", "אני מזהה סתירה", "סתירה"]
                             import json
+                            response_text = json.dumps(llm_resp, ensure_ascii=False)
+                            has_forbidden = any(word in response_text for word in forbidden_words)
+                            if has_forbidden:
+                                print(f"🛑 [ROBUSTNESS] Forbidden word detected in SYMPY_PARSE_ERROR response. Not Trusting LLM.")
                                 is_degraded = True
                                 degraded_reason = "polygraph_fail_forbidden_words"
+                                # Continue to next attempt
+                            elif attempts < max_attempts:
+                                # V280.0: If it's the first attempt, we MUST retry once to get better LaTeX
+                                print(f"🔄 [ROBUSTNESS] SymPy Parse Error on attempt {attempts}. Triggering retry for better LaTeX.")
+                                # We don't break here, so it continues the loop
                             else:
+                                # V280.0 Soft Fail: After retry (or max attempts), if No Forbidden Words, we TRUST the LLM.
+                                print(f"🛡️ [SOFT FAIL] Persistent Parse Error but no forbidden words. Trusting LLM output for sub-q {sub_q['id']}.")
                                 is_degraded = True
+                                degraded_reason = "sympy_soft_fail"
+                                break # Exit the attempt loop
                         elif attempts == max_attempts:
                             print(f"⚠️ [HOTFIX] Max attempts reached. Forcing LLM response despite Polygraph failure.")
                             is_degraded = True
         # חייב להכיל לפחות אות אנגלית אחת, מספר, או סימן מתמטי
         has_math_anchor = bool(re.search(r'[0-9xyzXYZ=+\-\(\)]', ocr_clean))
         # V5.7.5: Short Math Bypass (Happy Flow for simple equations)
         # Often simple equations like $2+2=?$ yield low OCR confidence but are valid.
         is_short_math = has_math_anchor and len(ocr_clean) < 15 and len(ocr_clean) > 2

prompts.py CHANGED Viewed

@@ -681,11 +681,10 @@ def get_master_prompt_v430():
 # ==================== V285.0: CHECK ME PROMPT (HOMEWORK VERIFICATION) ====================
-def get_check_me_prompt(grade: str, student_name: str, student_gender: str = "M"):
     """
-    V285.0: Dedicated prompt for the "Check Me" feature.
     The LLM acts as a homework checker, NOT a solver.
-    It receives the student's image and analyzes their work step-by-step.
     """
     # Gender-aware phrases
     if student_gender == "F":
@@ -705,6 +704,18 @@ def get_check_me_prompt(grade: str, student_name: str, student_gender: str = "M"
         g_great = "מעולה"
         g_dear = f"{student_name} יקר"
     return f"""
     🎓 תפקיד: אתה בודקת שיעורי בית — מורה פרטית חמה שבודקת את העבודה של תלמיד.
     🚫 אתה לא פותר את התרגיל מחדש! אתה מנתח את מה שהתלמיד כתב.
@@ -712,6 +723,8 @@ def get_check_me_prompt(grade: str, student_name: str, student_gender: str = "M"
     👤 התלמיד: {student_name}, כיתה {grade}.
     👑 מגדר: {"נקבה" if student_gender == "F" else "זכר"}. השתמש/י בלשון מתאימה.
     ═══════════════════════════════════════════════════
     📐 שלוש שלבי הבדיקה (חובה לבצע לפי הסדר):
     ═══════════════════════════════════════════════════

 # ==================== V285.0: CHECK ME PROMPT (HOMEWORK VERIFICATION) ====================
+def get_check_me_prompt(grade: str, student_name: str, student_gender: str = "M", data_anchor: dict = None):
     """
+    V285.1: Dedicated prompt for the "Check Me" feature with DATA ANCHOR.
     The LLM acts as a homework checker, NOT a solver.
     """
     # Gender-aware phrases
     if student_gender == "F":
         g_great = "מעולה"
         g_dear = f"{student_name} יקר"
+    anchor_block = ""
+    if data_anchor:
+        anchor_block = f"""
+    ══════════════════════════════════════════════════════
+    📜 DATA INTEGRITY RULE (ABSOLUTE TRUTH):
+    ══════════════════════════════════════════════════════
+    הנתונים להלן הם נתוני השאלה המקוריים כפי שזוהו בשלב הניתוח המוקדם.
+    עליך לבדוק את פתרון התלמיד אל מול הנתונים האלו בדיוק!
+    {json.dumps(data_anchor, indent=2, ensure_ascii=False)}
+    ══════════════════════════════════════════════════════
+    """
     return f"""
     🎓 תפקיד: אתה בודקת שיעורי בית — מורה פרטית חמה שבודקת את העבודה של תלמיד.
     🚫 אתה לא פותר את התרגיל מחדש! אתה מנתח את מה שהתלמיד כתב.
     👤 התלמיד: {student_name}, כיתה {grade}.
     👑 מגדר: {"נקבה" if student_gender == "F" else "זכר"}. השתמש/י בלשון מתאימה.
+    {anchor_block}
     ═══════════════════════════════════════════════════
     📐 שלוש שלבי הבדיקה (חובה לבצע לפי הסדר):
     ═══════════════════════════════════════════════════

test_veo.py DELETED Viewed

@@ -1,45 +0,0 @@
-from google import genai
-import os
-# --- 1. הדבק כאן את המפתח הארוך שייצרת ב-AI Studio ---
-API_KEY = "AIzaSyDBw4Ddf2Fk4bSfe4aCFybAH74Cr-O-Quc"
-def check_veo_access():
-    print("🔍 מתחיל בדיקת קישוריות מול Gemini API...")
-    try:
-        # אתחול הקליינט
-        client = genai.Client(api_key=API_KEY)
-        # בדיקה 1: האם המפתח בכלל עובד?
-        print("📡 בודק הרשאות מפתח בסיסיות...")
-        models = client.models.list()
-        print("✅ המפתח תקין ומחובר לשרתי גוגל.")
-        # בדיקה 2: האם המודל Veo פתוח עבורך?
-        print("🎬 בודק זמינות ספציפית למודל Veo 3.1...")
-        veo_info = client.models.get(model="veo-3.1-generate-preview")
-        print("\n" + "="*40)
-        print(f"🚀 בשורה התחתונה: הכל מוכן!")
-        print(f"מודל {veo_info.name} זמין עבורך.")
-        print("אתה יכול להריץ את ה-video_generator.py ולייצר את הסרטון!")
-        print("="*40)
-    except Exception as e:
-        print("\n" + "!"*40)
-        print(f"❌ הבדיקה נכשלה.")
-        error_msg = str(e).lower()
-        if "403" in error_msg or "permission" in error_msg:
-            print("\n💡 אבחנה: המפתח תקין, אבל אין לך הרשאה ל-Veo.")
-            print("זה קורה בדרך כלל אם החשבון שלך לא מוגדר כ-Paid Tier (עם כרטיס אשראי מעודכן).")
-            print("ב-AI Studio, וידאו דורש חשבון עם אמצעי תשלום (Pay-as-you-go).")
-        elif "401" in error_msg or "key" in error_msg:
-            print("\n💡 אבחנה: המפתח לא תקין. ודא שהעתקת את כל המחרוזת נכון.")
-        else:
-            print(f"\nשגיאה טכנית: {e}")
-        print("!"*40)
-if __name__ == "__main__":
-    check_veo_access()

tests/test_validation_robustness.py ADDED Viewed

	@@ -0,0 +1,84 @@

+import pytest
+import asyncio
+from domain.math_validator import MathPolygraph, _latex_to_sympy_str
+import sympy
+@pytest.mark.asyncio
+async def test_blind_stripping_and_regex():
+    # Test that mixed text is preserved and math is extracted
+    text = "נציב $x=5$ ונקבל $y=2$. התוצאה היא $$z=7$$."
+    # _validate_single should return True because all math segments are valid
+    ok, reason = await MathPolygraph._validate_single(text, 1)
+    assert ok, f"Failed mixed text: {reason}"
+@pytest.mark.asyncio
+async def test_multiline_display_math():
+    # Test re.DOTALL with multi-line display math
+    text = """הנה משוואה:
+    $$
+    x = 5 + 3
+    y = 10
+    $$
+    סוף."""
+    ok, reason = await MathPolygraph._validate_single(text, 1)
+    assert ok, f"Failed multiline display math: {reason}"
+@pytest.mark.asyncio
+async def test_multi_equal_guard():
+    # x=y=5 should not crash unpacking
+    text = "נתון $x = y = 5$."
+    ok, reason = await MathPolygraph._validate_single(text, 1)
+    assert ok, f"Failed multi-equal check: {reason}"
+@pytest.mark.asyncio
+async def test_empty_string_guard():
+    # $$$$ should be ignored
+    text = "ריק $$$$ וגם $ $."
+    ok, reason = await MathPolygraph._validate_single(text, 1)
+    assert ok, f"Failed empty string guard: {reason}"
+@pytest.mark.asyncio
+async def test_arithmetic_exception_handling():
+    # $1/0$ should not crash the server
+    text = "חלוקה באפס $1/0$."
+    ok, reason = await MathPolygraph._validate_single(text, 1)
+    assert not ok
+    assert "SYMPY_PARSE_ERROR" in reason
+@pytest.mark.asyncio
+async def test_variable_trap_in_equivalence():
+    # Algebraic equivalence should pass syntax check but skip numerical identity
+    # x=5 is validated segment by segment (LHS: x, RHS: 5)
+    # are_equivalent for x=5 and x=5 should return True
+    res = MathPolygraph.are_equivalent("x=5", "x=5")
+    assert res is True
+@pytest.mark.asyncio
+async def test_numerical_identity_check():
+    # Valid identity
+    assert MathPolygraph.are_equivalent("2+3", "5") is True
+    # Hallucination
+    assert MathPolygraph.are_equivalent("2+3", "6") is False
+@pytest.mark.asyncio
+async def test_inequality_guard():
+    # x > 0 should not crash simplify(LHS - RHS)
+    # It should skip identity check and return True because strings are same
+    assert MathPolygraph.are_equivalent("x > 0", "x > 0") is True
+@pytest.mark.asyncio
+async def test_rce_protection():
+    # Malicious string that would execute if sympify was used without evaluate=False
+    # However, parse_expr(evaluate=False) just builds the tree.
+    # We just want to ensure it doesn't crash or execute.
+    # Note: testing "exec" in a string is hard without side effects,
+    # but we can verify it doesn't crash on standard malicious patterns.
+    text = "__import__('os').system('echo hello')"
+    # This should definitely fail parsing or at least not execute
+    ok, reason = await MathPolygraph._check_segment(text, 1)
+    assert not ok
+    assert "SYMPY_PARSE_ERROR" in reason
+if __name__ == "__main__":
+    import pytest
+    pytest.main([__file__])

video_generator.py DELETED Viewed

@@ -1,87 +0,0 @@
-import time
-import os
-from google import genai
-from google.genai import types
-MODEL = "veo-3.1-generate-preview"
-# חשוב מאוד – להגדיר מפתח API בסביבה
-API_KEY = "AIzaSyDAM6BLLVWZDJsq9p-NdckwKQIi8EfCeHo"
-client = genai.Client(
-    api_key=API_KEY,
-    http_options={"api_version": "v1beta"}
-)
-def generate_first_shot():
-    print("🎬 מתחיל יצירת השוט הראשון")
-    # Upload reference image
-    print("☁️ מעלה תמונת reference...")
-    uploaded_file = client.files.upload(
-        file="boy_master.jpg"
-    )
-    print("✅ הועלה:", uploaded_file.uri)
-    # Prompt קולנועי
-    prompt = """
-    Teenage boy sitting at wooden desk feeling frustrated while solving math homework.
-    He sighs slightly and looks at notebook.
-    Cinematic lighting, shallow depth of field.
-    Slow natural camera push in.
-    Subtle handheld motion.
-    High fidelity character consistency.
-    """
-    # ⭐ כאן זה הסוד — צריך VideoGenerationSource
-    source = types.VideoGenerationSource(
-        prompt=prompt,
-        reference_images=[uploaded_file]
-    )
-    config = types.GenerateVideosConfig(
-        person_generation="dont_allow",
-        aspect_ratio="16:9",
-        number_of_videos=1,
-        duration_seconds=8,
-        resolution="720p"
-    )
-    print("🚀 שולח בקשה ל-Veo 3.1 Standard...")
-    operation = client.models.generate_videos(
-        model=MODEL,
-        source=source,
-        config=config
-    )
-    # Polling
-    while not operation.done:
-        print("⏳ מחכה לרינדור...")
-        time.sleep(10)
-        operation = client.operations.get(operation.name)
-    result = operation.result
-    if not result:
-        print("❌ לא נוצר וידאו")
-        return
-    for i, video in enumerate(result.generated_videos):
-        print("✨ וידאו נוצר:", video.video.uri)
-        client.files.download(file=video.video)
-        video.video.save(f"shot_01.mp4")
-        print("💾 נשמר → shot_01.mp4")
-if __name__ == "__main__":
-    generate_first_shot()