V280.0: Robust Math Validation Fixes & Model Lockdown (RCE Protection, Multiline Regex, Soft Fail)
Browse files- config.py +5 -0
- deploy_hf +1 -1
- domain/math_validator.py +152 -40
- find_models.py +0 -8
- orchestrator.py +41 -17
- prompts.py +16 -3
- test_veo.py +0 -45
- tests/test_validation_robustness.py +84 -0
- video_generator.py +0 -87
config.py
CHANGED
|
@@ -54,3 +54,8 @@ CONFIDENCE_THRESHOLD_MEDIUM = 0.55 if IS_PRODUCTION else 0.01
|
|
| 54 |
print(f"[CONFIG] Loading {ENV.upper()} configuration.")
|
| 55 |
print(f"[CONFIG] Project: {PROJECT_ID}")
|
| 56 |
print(f"[CONFIG] Bucket: {STORAGE_BUCKET}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
print(f"[CONFIG] Loading {ENV.upper()} configuration.")
|
| 55 |
print(f"[CONFIG] Project: {PROJECT_ID}")
|
| 56 |
print(f"[CONFIG] Bucket: {STORAGE_BUCKET}")
|
| 57 |
+
|
| 58 |
+
# V3.1.3: Model Hardening - Restricted to gemini-2.0-flash ONLY
|
| 59 |
+
# To change model, update the environment variable 'GEMINI_MODEL'
|
| 60 |
+
GEMINI_MODEL = os.getenv("GEMINI_MODEL", "gemini-2.0-flash")
|
| 61 |
+
print(f"[CONFIG] Active Model: {GEMINI_MODEL}")
|
deploy_hf
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
Subproject commit
|
|
|
|
| 1 |
+
Subproject commit 080bbe367f70411fa816f96e2a9ea47a63b728b2
|
domain/math_validator.py
CHANGED
|
@@ -4,8 +4,9 @@ import logging
|
|
| 4 |
import multiprocessing
|
| 5 |
import time
|
| 6 |
import asyncio
|
| 7 |
-
from typing import Tuple, List
|
| 8 |
import sympy
|
|
|
|
| 9 |
|
| 10 |
logger = logging.getLogger(__name__)
|
| 11 |
|
|
@@ -69,15 +70,9 @@ def _latex_to_sympy_str(latex_str: str) -> str:
|
|
| 69 |
# 7. Implicit multiplication: 2x โ 2*x (only if not inside a word)
|
| 70 |
s = re.sub(r'(\d)([a-zA-Z(])', r'\1*\2', s)
|
| 71 |
|
| 72 |
-
# 8.
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
# V310.0: If one side is empty, ignore the '='
|
| 76 |
-
if parts[0].strip() and parts[1].strip():
|
| 77 |
-
s = f"({parts[0]}) - ({parts[1]})"
|
| 78 |
-
else:
|
| 79 |
-
s = parts[0] if parts[0].strip() else parts[1]
|
| 80 |
-
|
| 81 |
# 9. Final cleanup: Remove illegal SymPy chars like ', ", ?, !
|
| 82 |
s = re.sub(r'[?!\'"]', '', s)
|
| 83 |
s = re.sub(r'\s+', ' ', s)
|
|
@@ -95,22 +90,59 @@ class MathPolygraph:
|
|
| 95 |
|
| 96 |
@staticmethod
|
| 97 |
def _sympify_worker(expr_str: str, queue: multiprocessing.Queue):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 98 |
try:
|
| 99 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 100 |
queue.put(True)
|
| 101 |
-
except Exception:
|
| 102 |
queue.put(False)
|
| 103 |
|
| 104 |
@staticmethod
|
| 105 |
def _sympify_with_timeout(expr_str: str) -> bool:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 106 |
queue = multiprocessing.Queue()
|
| 107 |
-
process = multiprocessing.Process(target=MathPolygraph._sympify_worker, args=(
|
| 108 |
try:
|
| 109 |
process.start()
|
| 110 |
-
process.
|
|
|
|
|
|
|
| 111 |
if process.is_alive():
|
| 112 |
process.terminate()
|
| 113 |
process.join()
|
|
|
|
|
|
|
| 114 |
return None # TIMEOUT
|
| 115 |
if not queue.empty():
|
| 116 |
return queue.get()
|
|
@@ -121,25 +153,80 @@ class MathPolygraph:
|
|
| 121 |
return False
|
| 122 |
|
| 123 |
@staticmethod
|
| 124 |
-
async def _validate_single(
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
|
|
|
|
|
|
| 132 |
return True, ""
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 136 |
return True, ""
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 143 |
|
| 144 |
@staticmethod
|
| 145 |
async def validate_step_sequence(steps: List[dict]) -> Tuple[bool, str]:
|
|
@@ -165,21 +252,46 @@ class MathPolygraph:
|
|
| 165 |
Supports expressions and equations (by converting to 'expr = 0').
|
| 166 |
"""
|
| 167 |
try:
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
if
|
| 172 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 173 |
|
| 174 |
-
|
| 175 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 176 |
|
| 177 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
| 178 |
diff = sympy.simplify(expr1 - expr2)
|
| 179 |
return diff == 0
|
| 180 |
except Exception as e:
|
| 181 |
logger.warning(f"[POLYGRAPH] Equivalence check failed: {e}")
|
| 182 |
-
return
|
| 183 |
|
| 184 |
@staticmethod
|
| 185 |
async def verify_algebraic_consistency(steps: List[dict]) -> Tuple[bool, str]:
|
|
|
|
| 4 |
import multiprocessing
|
| 5 |
import time
|
| 6 |
import asyncio
|
| 7 |
+
from typing import Tuple, List, Optional
|
| 8 |
import sympy
|
| 9 |
+
from sympy.parsing.sympy_parser import parse_expr
|
| 10 |
|
| 11 |
logger = logging.getLogger(__name__)
|
| 12 |
|
|
|
|
| 70 |
# 7. Implicit multiplication: 2x โ 2*x (only if not inside a word)
|
| 71 |
s = re.sub(r'(\d)([a-zA-Z(])', r'\1*\2', s)
|
| 72 |
|
| 73 |
+
# 8. V280.0: Equals sign handling is now moved to _check_segment
|
| 74 |
+
# for more robust parsing of equations.
|
| 75 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
# 9. Final cleanup: Remove illegal SymPy chars like ', ", ?, !
|
| 77 |
s = re.sub(r'[?!\'"]', '', s)
|
| 78 |
s = re.sub(r'\s+', ' ', s)
|
|
|
|
| 90 |
|
| 91 |
@staticmethod
|
| 92 |
def _sympify_worker(expr_str: str, queue: multiprocessing.Queue):
|
| 93 |
+
"""
|
| 94 |
+
V280.0: Security Hardened Worker.
|
| 95 |
+
1. Character Whitelist: Only allow safe mathematical characters.
|
| 96 |
+
2. parse_expr(evaluate=False): Prevent RCE and immediate evaluation.
|
| 97 |
+
"""
|
| 98 |
try:
|
| 99 |
+
# RCE Prevention: Extreme character whitelist before parsing
|
| 100 |
+
# V280.0 FIX: Added ! for factorials and ensured strict match.
|
| 101 |
+
safe_pattern = r'^[a-zA-Z0-9\s\+\-\*\/\^\(\)\.\,\!\%\=]+$'
|
| 102 |
+
if not re.match(safe_pattern, expr_str):
|
| 103 |
+
queue.put(False)
|
| 104 |
+
return
|
| 105 |
+
|
| 106 |
+
# Security: evaluate=False stops automatic eval() of passed strings.
|
| 107 |
+
res = parse_expr(expr_str, evaluate=False)
|
| 108 |
+
|
| 109 |
+
# V280.0 FIX: Catch arithmetic errors like 1/0.
|
| 110 |
+
# In SymPy, 1/0 evaluates to 'zoo' (ComplexInfinity).
|
| 111 |
+
if res is not None:
|
| 112 |
+
# evaluate the expression
|
| 113 |
+
evaluated = res.doit()
|
| 114 |
+
# If the result is infinite (zoo, oo, -oo) or NaN, treat as error
|
| 115 |
+
# We check is_finite directly.
|
| 116 |
+
if hasattr(evaluated, 'is_finite') and evaluated.is_finite is False:
|
| 117 |
+
raise ZeroDivisionError("Infinite or undefined result")
|
| 118 |
+
if hasattr(evaluated, 'is_nan') and evaluated.is_nan:
|
| 119 |
+
raise ValueError("NaN result")
|
| 120 |
+
|
| 121 |
queue.put(True)
|
| 122 |
+
except (ZeroDivisionError, TypeError, ValueError, Exception) as e:
|
| 123 |
queue.put(False)
|
| 124 |
|
| 125 |
@staticmethod
|
| 126 |
def _sympify_with_timeout(expr_str: str) -> bool:
|
| 127 |
+
"""Helper to run parsing in a separate process to enforce timeout."""
|
| 128 |
+
if not expr_str or not expr_str.strip():
|
| 129 |
+
return True
|
| 130 |
+
|
| 131 |
+
# Strip characters that might survive _latex_to_sympy_str but fail whitelist
|
| 132 |
+
s = expr_str.replace('\\', '').replace('_', '').replace('{', '(').replace('}', ')')
|
| 133 |
+
|
| 134 |
queue = multiprocessing.Queue()
|
| 135 |
+
process = multiprocessing.Process(target=MathPolygraph._sympify_worker, args=(s, queue))
|
| 136 |
try:
|
| 137 |
process.start()
|
| 138 |
+
# Windows needs a generous timeout for cold process start + SymPy import.
|
| 139 |
+
# 10 seconds is safe for verification/testing.
|
| 140 |
+
process.join(timeout=10)
|
| 141 |
if process.is_alive():
|
| 142 |
process.terminate()
|
| 143 |
process.join()
|
| 144 |
+
with open('debug_math.val', 'a', encoding='utf-8') as f:
|
| 145 |
+
f.write(f"[{time.time()}] TIMEOUT on '{s}'\n")
|
| 146 |
return None # TIMEOUT
|
| 147 |
if not queue.empty():
|
| 148 |
return queue.get()
|
|
|
|
| 153 |
return False
|
| 154 |
|
| 155 |
@staticmethod
|
| 156 |
+
async def _validate_single(text: str, step_id) -> Tuple[bool, str]:
|
| 157 |
+
"""
|
| 158 |
+
V280.0 REDESIGN:
|
| 159 |
+
1. No Blind Stripping: Extracts $...$ or $$...$$ using re.finditer with DOTALL.
|
| 160 |
+
2. Security: Uses parse_expr(evaluate=False).
|
| 161 |
+
3. Equations: Splits by '=' and validates parts to bypass SymPy's '=' limitation.
|
| 162 |
+
4. Multi-Equal: Handles x=y=5 without crashing.
|
| 163 |
+
5. Empty Guard: Skips $$$$.
|
| 164 |
+
"""
|
| 165 |
+
if not text or not text.strip():
|
| 166 |
return True, ""
|
| 167 |
+
|
| 168 |
+
# regex: find both $$display$$ and $inline$ blocks. DOTALL allows multi-line display math.
|
| 169 |
+
# Group 1 = display math, Group 2 = inline math
|
| 170 |
+
math_pattern = re.compile(r'\$\$(.*?)\$\$|\$(.*?)\$', re.DOTALL)
|
| 171 |
+
matches = list(re.finditer(math_pattern, text))
|
| 172 |
+
|
| 173 |
+
if not matches:
|
| 174 |
+
# V280.0 Rule: If no delimiters are found, treat the whole string as plain text
|
| 175 |
+
# or try to parse if it looks like math (existing behavior for backward compatibility)
|
| 176 |
+
if _is_plaintext(text):
|
| 177 |
return True, ""
|
| 178 |
+
return await MathPolygraph._check_segment(text, step_id)
|
| 179 |
+
|
| 180 |
+
for match in matches:
|
| 181 |
+
# Group 1 (Display) or Group 2 (Inline)
|
| 182 |
+
content = (match.group(1) or match.group(2) or "").strip()
|
| 183 |
+
|
| 184 |
+
# 5. Empty String Guard
|
| 185 |
+
if not content:
|
| 186 |
+
continue
|
| 187 |
+
|
| 188 |
+
# V280.0 Fix: Multi-line display math might contain multiple equations.
|
| 189 |
+
# Split by newline before validating segments.
|
| 190 |
+
sub_segments = [s.strip() for s in content.split('\n') if s.strip()]
|
| 191 |
+
for sub in sub_segments:
|
| 192 |
+
ok, reason = await MathPolygraph._check_segment(sub, step_id)
|
| 193 |
+
if not ok:
|
| 194 |
+
return False, reason
|
| 195 |
+
|
| 196 |
+
return True, ""
|
| 197 |
+
|
| 198 |
+
@staticmethod
|
| 199 |
+
async def _check_segment(raw_segment: str, step_id) -> Tuple[bool, str]:
|
| 200 |
+
"""Internal helper to validate a single extracted math segment."""
|
| 201 |
+
# 4. Multi-Equal Sign Handling & Unpacking Crash Prevention
|
| 202 |
+
eq_count = raw_segment.count('=')
|
| 203 |
+
|
| 204 |
+
parts_to_check = []
|
| 205 |
+
if eq_count >= 1:
|
| 206 |
+
# Split by all equalities and check each segment (e.g. x=y=5 -> check x, y, 5)
|
| 207 |
+
# This bypasses SymPy's inability to parse "=" and prevents split() unpacking errors.
|
| 208 |
+
parts_to_check = [p.strip() for p in raw_segment.split('=') if p.strip()]
|
| 209 |
+
else:
|
| 210 |
+
parts_to_check = [raw_segment]
|
| 211 |
+
|
| 212 |
+
for part in parts_to_check:
|
| 213 |
+
sympy_str = _latex_to_sympy_str(part)
|
| 214 |
+
if not sympy_str or sympy_str in ('', '-', '()', '( )'):
|
| 215 |
+
continue
|
| 216 |
+
|
| 217 |
+
try:
|
| 218 |
+
# Run with timeout to prevent ReDoS or complex simplification hangs
|
| 219 |
+
status = await asyncio.to_thread(MathPolygraph._sympify_with_timeout, sympy_str)
|
| 220 |
+
if status is False:
|
| 221 |
+
return False, f"SYMPY_PARSE_ERROR:step_{step_id}"
|
| 222 |
+
elif status is None:
|
| 223 |
+
# Timeout is treated as a soft warning for now
|
| 224 |
+
logger.warning(f"[V280.0] SymPy timeout on segment: {part}")
|
| 225 |
+
except Exception as e:
|
| 226 |
+
logger.error(f"[V280.0] Unexpected validation crash: {e}")
|
| 227 |
+
return False, f"SYMPY_CRASH:step_{step_id}"
|
| 228 |
+
|
| 229 |
+
return True, ""
|
| 230 |
|
| 231 |
@staticmethod
|
| 232 |
async def validate_step_sequence(steps: List[dict]) -> Tuple[bool, str]:
|
|
|
|
| 252 |
Supports expressions and equations (by converting to 'expr = 0').
|
| 253 |
"""
|
| 254 |
try:
|
| 255 |
+
# V280.0: Handle Equations in Equivalence Check
|
| 256 |
+
# If both contain '=', split and compare parts.
|
| 257 |
+
# Only recurse once!
|
| 258 |
+
if '=' in latex1 and '=' in latex2 and latex1.count('=') == 1 and latex2.count('=') == 1:
|
| 259 |
+
parts1 = [p.strip() for p in latex1.split('=') if p.strip()]
|
| 260 |
+
parts2 = [p.strip() for p in latex2.split('=') if p.strip()]
|
| 261 |
+
if len(parts1) == 2 and len(parts2) == 2:
|
| 262 |
+
return MathPolygraph.are_equivalent(parts1[0], parts2[0]) and \
|
| 263 |
+
MathPolygraph.are_equivalent(parts1[1], parts2[1])
|
| 264 |
+
|
| 265 |
+
s1_raw = _latex_to_sympy_str(latex1)
|
| 266 |
+
s2_raw = _latex_to_sympy_str(latex2)
|
| 267 |
|
| 268 |
+
# Check for inequalities in raw LaTeX to be safe
|
| 269 |
+
inequalities = ['<', '>', r'\leq', r'\geq', r'\neq', r'\leq', r'\geq']
|
| 270 |
+
if any(iq in latex1 for iq in inequalities) or any(iq in latex2 for iq in inequalities):
|
| 271 |
+
return latex1.strip() == latex2.strip()
|
| 272 |
+
|
| 273 |
+
# Security: Strict Whitelist for Equivalence Check
|
| 274 |
+
safe_pattern = r'^[a-zA-Z0-9\s\+\-\*\/\^\(\)\.\,\!\=]+$'
|
| 275 |
+
def is_safe(s):
|
| 276 |
+
clean = s.replace('\\', '').replace('_', '').replace('{', '(').replace('}', ')')
|
| 277 |
+
return bool(re.match(safe_pattern, clean))
|
| 278 |
+
|
| 279 |
+
if not (is_safe(s1_raw) and is_safe(s2_raw)):
|
| 280 |
+
return latex1.strip() == latex2.strip()
|
| 281 |
+
|
| 282 |
+
expr1 = parse_expr(s1_raw, evaluate=False)
|
| 283 |
+
expr2 = parse_expr(s2_raw, evaluate=False)
|
| 284 |
|
| 285 |
+
# "Variable Trap": Basic structural equivalence if variables are involved
|
| 286 |
+
if len(expr1.free_symbols) > 0 or len(expr2.free_symbols) > 0:
|
| 287 |
+
return sympy.simplify(expr1 - expr2) == 0
|
| 288 |
+
|
| 289 |
+
# Numerical Identity check: simplify(LHS - RHS) == 0
|
| 290 |
diff = sympy.simplify(expr1 - expr2)
|
| 291 |
return diff == 0
|
| 292 |
except Exception as e:
|
| 293 |
logger.warning(f"[POLYGRAPH] Equivalence check failed: {e}")
|
| 294 |
+
return False
|
| 295 |
|
| 296 |
@staticmethod
|
| 297 |
async def verify_algebraic_consistency(steps: List[dict]) -> Tuple[bool, str]:
|
find_models.py
DELETED
|
@@ -1,8 +0,0 @@
|
|
| 1 |
-
from google import genai
|
| 2 |
-
|
| 3 |
-
client = genai.Client(api_key="YOUR_GEMINI_API_KEY_HERE")
|
| 4 |
-
|
| 5 |
-
print("ืืืคืฉ ืืืืื Pro ืืืื ืื ืืืคืชื ืฉืื...")
|
| 6 |
-
for model in client.models.list():
|
| 7 |
-
if "pro" in model.name:
|
| 8 |
-
print(model.name)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
orchestrator.py
CHANGED
|
@@ -17,6 +17,7 @@ from smart_solver import sign_step, resolve_ast_target, execute_action
|
|
| 17 |
import domain.telemetry as telemetry
|
| 18 |
from domain.schemas import BuddyEvent, BuddyState # V8.5: Streaming contract
|
| 19 |
from firebase_manager import firebase_manager
|
|
|
|
| 20 |
|
| 21 |
# V8.6.9: Global Guardrails (Increased for High-Complexity 5-Unit Problems)
|
| 22 |
GLOBAL_TOKEN_LIMIT = 50000
|
|
@@ -385,11 +386,11 @@ class BuddyOrchestrator:
|
|
| 385 |
genai.configure(api_key=os.environ.get("GOOGLE_API_KEY", ""))
|
| 386 |
# V8.6.1: Force Strict JSON Output to prevent Markdown/Preamble leakage
|
| 387 |
self.model = genai.GenerativeModel(
|
| 388 |
-
model_name=
|
| 389 |
generation_config={"response_mime_type": "application/json"}
|
| 390 |
)
|
| 391 |
self.vision_model = genai.GenerativeModel(
|
| 392 |
-
model_name=
|
| 393 |
generation_config={"response_mime_type": "application/json"}
|
| 394 |
)
|
| 395 |
self.smart_solver = SmartSolver() # No model parameter needed
|
|
@@ -1177,11 +1178,18 @@ ctx.finish("$$ 4 $$", "ืืขืืื! ืืืขื ื ืืชืืฆืื.")
|
|
| 1177 |
)
|
| 1178 |
|
| 1179 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1180 |
# Step 2: Build check-me prompt and send to Vision LLM
|
| 1181 |
check_prompt = prompts.get_check_me_prompt(
|
| 1182 |
grade=grade,
|
| 1183 |
student_name=student_name,
|
| 1184 |
-
student_gender=student_gender
|
|
|
|
| 1185 |
)
|
| 1186 |
|
| 1187 |
print(f"๐ [CHECK-ME] Sending image ({len(image_data)} bytes) + check prompt to Vision LLM...")
|
|
@@ -1324,8 +1332,15 @@ ctx.finish("$$ 4 $$", "ืืขืืื! ืืืขื ื ืืชืืฆืื.")
|
|
| 1324 |
)
|
| 1325 |
|
| 1326 |
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 1327 |
-
# Step 8: COMPLETE with final answer
|
| 1328 |
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1329 |
if verdict == "correct":
|
| 1330 |
final_answer_text = f"โ
ืื ืืืืื! ืืคืชืจืื ื ืืื! {encouragement}"
|
| 1331 |
elif verdict == "unreadable":
|
|
@@ -1333,7 +1348,7 @@ ctx.finish("$$ 4 $$", "ืืขืืื! ืืืขื ื ืืชืืฆืื.")
|
|
| 1333 |
elif verdict == "methodology_error":
|
| 1334 |
final_answer_text = f"๐ ืืฉ ืืขืื ืืฉืืืช ืืคืชืจืื. {methodology_note}"
|
| 1335 |
else:
|
| 1336 |
-
final_answer_text = f"๐ ืืชืฉืืื ืื ืืื ื:
|
| 1337 |
|
| 1338 |
yield BuddyEvent(
|
| 1339 |
question_id=question_id,
|
|
@@ -1344,6 +1359,8 @@ ctx.finish("$$ 4 $$", "ืืขืืื! ืืืขื ื ืืชืืฆืื.")
|
|
| 1344 |
"is_correct": verdict == "correct",
|
| 1345 |
"score": score,
|
| 1346 |
"mistakes": mistakes,
|
|
|
|
|
|
|
| 1347 |
"problem_identified": problem_identified
|
| 1348 |
}
|
| 1349 |
)
|
|
@@ -1681,21 +1698,30 @@ ctx.finish("$$ 4 $$", "ืืขืืื! ืืืขื ื ืืชืืฆืื.")
|
|
| 1681 |
|
| 1682 |
# ืืขืงืฃ: ืื ืืฉืืืื ืืื ืจืง ืืขืืืช ืงืจืืื ืฉื ืกืืื ืื (ืื-ืฉืืืืื ืื/ืืืฆืื), ืกืืืืื ืขื ื-LLM ืืืืฆืืื
|
| 1683 |
if "SYMPY_PARSE_ERROR" in str(poly_reason):
|
| 1684 |
-
#
|
| 1685 |
-
#
|
| 1686 |
-
|
|
|
|
| 1687 |
import json
|
| 1688 |
-
response_text = json.dumps(llm_resp, ensure_ascii=False)
|
| 1689 |
-
|
| 1690 |
-
|
|
|
|
|
|
|
|
|
|
| 1691 |
is_degraded = True
|
| 1692 |
degraded_reason = "polygraph_fail_forbidden_words"
|
| 1693 |
-
# Continue to
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1694 |
else:
|
| 1695 |
-
|
|
|
|
| 1696 |
is_degraded = True
|
| 1697 |
-
degraded_reason = "
|
| 1698 |
-
break
|
| 1699 |
elif attempts == max_attempts:
|
| 1700 |
print(f"โ ๏ธ [HOTFIX] Max attempts reached. Forcing LLM response despite Polygraph failure.")
|
| 1701 |
is_degraded = True
|
|
@@ -2049,8 +2075,6 @@ ctx.finish("$$ 4 $$", "ืืขืืื! ืืืขื ื ืืชืืฆืื.")
|
|
| 2049 |
|
| 2050 |
# ืืืื ืืืืื ืืคืืืช ืืืช ืื ืืืืช ืืืช, ืืกืคืจ, ืื ืกืืื ืืชืืื
|
| 2051 |
has_math_anchor = bool(re.search(r'[0-9xyzXYZ=+\-\(\)]', ocr_clean))
|
| 2052 |
-
from config import CONFIDENCE_THRESHOLD_HIGH, CONFIDENCE_THRESHOLD_MEDIUM
|
| 2053 |
-
|
| 2054 |
# V5.7.5: Short Math Bypass (Happy Flow for simple equations)
|
| 2055 |
# Often simple equations like $2+2=?$ yield low OCR confidence but are valid.
|
| 2056 |
is_short_math = has_math_anchor and len(ocr_clean) < 15 and len(ocr_clean) > 2
|
|
|
|
| 17 |
import domain.telemetry as telemetry
|
| 18 |
from domain.schemas import BuddyEvent, BuddyState # V8.5: Streaming contract
|
| 19 |
from firebase_manager import firebase_manager
|
| 20 |
+
from config import IS_PRODUCTION, ENV, GEMINI_MODEL, CONFIDENCE_THRESHOLD_HIGH, CONFIDENCE_THRESHOLD_MEDIUM
|
| 21 |
|
| 22 |
# V8.6.9: Global Guardrails (Increased for High-Complexity 5-Unit Problems)
|
| 23 |
GLOBAL_TOKEN_LIMIT = 50000
|
|
|
|
| 386 |
genai.configure(api_key=os.environ.get("GOOGLE_API_KEY", ""))
|
| 387 |
# V8.6.1: Force Strict JSON Output to prevent Markdown/Preamble leakage
|
| 388 |
self.model = genai.GenerativeModel(
|
| 389 |
+
model_name=GEMINI_MODEL,
|
| 390 |
generation_config={"response_mime_type": "application/json"}
|
| 391 |
)
|
| 392 |
self.vision_model = genai.GenerativeModel(
|
| 393 |
+
model_name=GEMINI_MODEL,
|
| 394 |
generation_config={"response_mime_type": "application/json"}
|
| 395 |
)
|
| 396 |
self.smart_solver = SmartSolver() # No model parameter needed
|
|
|
|
| 1178 |
)
|
| 1179 |
|
| 1180 |
try:
|
| 1181 |
+
# V311.0: Data Slicing Guardrail
|
| 1182 |
+
# First, transcribe and extract the "Absolute Truth" of the problem
|
| 1183 |
+
print("๐ [CHECK-ME] Step 1.5: Extracting Problem Data (Data Slicing)...")
|
| 1184 |
+
problem_text = await self.transcribe_image(image_data)
|
| 1185 |
+
data_anchor = await self._extract_key_data(problem_text)
|
| 1186 |
+
|
| 1187 |
# Step 2: Build check-me prompt and send to Vision LLM
|
| 1188 |
check_prompt = prompts.get_check_me_prompt(
|
| 1189 |
grade=grade,
|
| 1190 |
student_name=student_name,
|
| 1191 |
+
student_gender=student_gender,
|
| 1192 |
+
data_anchor=data_anchor
|
| 1193 |
)
|
| 1194 |
|
| 1195 |
print(f"๐ [CHECK-ME] Sending image ({len(image_data)} bytes) + check prompt to Vision LLM...")
|
|
|
|
| 1332 |
)
|
| 1333 |
|
| 1334 |
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 1335 |
+
# Step 8: COMPLETE with final answer & Protocol Alignment
|
| 1336 |
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 1337 |
+
from pedagogical_builder import sanitize_math_text
|
| 1338 |
+
|
| 1339 |
+
# V311.0: LaTeX UI Safety
|
| 1340 |
+
safe_correct_answer = sanitize_math_text(correct_answer) if correct_answer else ""
|
| 1341 |
+
if safe_correct_answer and not safe_correct_answer.startswith("$$") and not safe_correct_answer.startswith("$"):
|
| 1342 |
+
safe_correct_answer = f"$${safe_correct_answer}$$"
|
| 1343 |
+
|
| 1344 |
if verdict == "correct":
|
| 1345 |
final_answer_text = f"โ
ืื ืืืืื! ืืคืชืจืื ื ืืื! {encouragement}"
|
| 1346 |
elif verdict == "unreadable":
|
|
|
|
| 1348 |
elif verdict == "methodology_error":
|
| 1349 |
final_answer_text = f"๐ ืืฉ ืืขืื ืืฉืืืช ืืคืชืจืื. {methodology_note}"
|
| 1350 |
else:
|
| 1351 |
+
final_answer_text = f"๐ ืืชืฉืืื ืื ืืื ื: {safe_correct_answer}" if safe_correct_answer else encouragement
|
| 1352 |
|
| 1353 |
yield BuddyEvent(
|
| 1354 |
question_id=question_id,
|
|
|
|
| 1359 |
"is_correct": verdict == "correct",
|
| 1360 |
"score": score,
|
| 1361 |
"mistakes": mistakes,
|
| 1362 |
+
"feedback": encouragement, # Protocol Alignment
|
| 1363 |
+
"correct_answer": safe_correct_answer, # Protocol Alignment
|
| 1364 |
"problem_identified": problem_identified
|
| 1365 |
}
|
| 1366 |
)
|
|
|
|
| 1698 |
|
| 1699 |
# ืืขืงืฃ: ืื ืืฉืืืื ืืื ืจืง ืืขืืืช ืงืจืืื ืฉื ืกืืื ืื (ืื-ืฉืืืืื ืื/ืืืฆืื), ืกืืืืื ืขื ื-LLM ืืืืฆืืื
|
| 1700 |
if "SYMPY_PARSE_ERROR" in str(poly_reason):
|
| 1701 |
+
# V280.0 + V310.0: Smart Retry & Soft Fail with JSON Security check
|
| 1702 |
+
# 1. Logic: Only allow bypass if it's NOT the first attempt OR it's a "Soft Fail" case.
|
| 1703 |
+
# 2. Pedagogical: "ืืื ืคืชืจืื" is allowed. "ืื ืืืชืื" remains removed.
|
| 1704 |
+
forbidden_words = ["ืกืชืืจื ืื ืชืื ืื", "ืื ืืืืื ื", "ืฉืืืื ืืืืฉืื ืฉืื", "ืื ื ืืืื ืกืชืืจื", "ืกืชืืจื"]
|
| 1705 |
import json
|
| 1706 |
+
response_text = json.dumps(llm_resp, ensure_ascii=False)
|
| 1707 |
+
|
| 1708 |
+
has_forbidden = any(word in response_text for word in forbidden_words)
|
| 1709 |
+
|
| 1710 |
+
if has_forbidden:
|
| 1711 |
+
print(f"๐ [ROBUSTNESS] Forbidden word detected in SYMPY_PARSE_ERROR response. Not Trusting LLM.")
|
| 1712 |
is_degraded = True
|
| 1713 |
degraded_reason = "polygraph_fail_forbidden_words"
|
| 1714 |
+
# Continue to next attempt
|
| 1715 |
+
elif attempts < max_attempts:
|
| 1716 |
+
# V280.0: If it's the first attempt, we MUST retry once to get better LaTeX
|
| 1717 |
+
print(f"๐ [ROBUSTNESS] SymPy Parse Error on attempt {attempts}. Triggering retry for better LaTeX.")
|
| 1718 |
+
# We don't break here, so it continues the loop
|
| 1719 |
else:
|
| 1720 |
+
# V280.0 Soft Fail: After retry (or max attempts), if No Forbidden Words, we TRUST the LLM.
|
| 1721 |
+
print(f"๐ก๏ธ [SOFT FAIL] Persistent Parse Error but no forbidden words. Trusting LLM output for sub-q {sub_q['id']}.")
|
| 1722 |
is_degraded = True
|
| 1723 |
+
degraded_reason = "sympy_soft_fail"
|
| 1724 |
+
break # Exit the attempt loop
|
| 1725 |
elif attempts == max_attempts:
|
| 1726 |
print(f"โ ๏ธ [HOTFIX] Max attempts reached. Forcing LLM response despite Polygraph failure.")
|
| 1727 |
is_degraded = True
|
|
|
|
| 2075 |
|
| 2076 |
# ืืืื ืืืืื ืืคืืืช ืืืช ืื ืืืืช ืืืช, ืืกืคืจ, ืื ืกืืื ืืชืืื
|
| 2077 |
has_math_anchor = bool(re.search(r'[0-9xyzXYZ=+\-\(\)]', ocr_clean))
|
|
|
|
|
|
|
| 2078 |
# V5.7.5: Short Math Bypass (Happy Flow for simple equations)
|
| 2079 |
# Often simple equations like $2+2=?$ yield low OCR confidence but are valid.
|
| 2080 |
is_short_math = has_math_anchor and len(ocr_clean) < 15 and len(ocr_clean) > 2
|
prompts.py
CHANGED
|
@@ -681,11 +681,10 @@ def get_master_prompt_v430():
|
|
| 681 |
|
| 682 |
# ==================== V285.0: CHECK ME PROMPT (HOMEWORK VERIFICATION) ====================
|
| 683 |
|
| 684 |
-
def get_check_me_prompt(grade: str, student_name: str, student_gender: str = "M"):
|
| 685 |
"""
|
| 686 |
-
V285.
|
| 687 |
The LLM acts as a homework checker, NOT a solver.
|
| 688 |
-
It receives the student's image and analyzes their work step-by-step.
|
| 689 |
"""
|
| 690 |
# Gender-aware phrases
|
| 691 |
if student_gender == "F":
|
|
@@ -705,6 +704,18 @@ def get_check_me_prompt(grade: str, student_name: str, student_gender: str = "M"
|
|
| 705 |
g_great = "ืืขืืื"
|
| 706 |
g_dear = f"{student_name} ืืงืจ"
|
| 707 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 708 |
return f"""
|
| 709 |
๐ ืชืคืงืื: ืืชื ืืืืงืช ืฉืืขืืจื ืืืช โ ืืืจื ืคืจืืืช ืืื ืฉืืืืงืช ืืช ืืขืืืื ืฉื ืชืืืื.
|
| 710 |
๐ซ ืืชื ืื ืคืืชืจ ืืช ืืชืจืืื ืืืืฉ! ืืชื ืื ืชื ืืช ืื ืฉืืชืืืื ืืชื.
|
|
@@ -712,6 +723,8 @@ def get_check_me_prompt(grade: str, student_name: str, student_gender: str = "M"
|
|
| 712 |
๐ค ืืชืืืื: {student_name}, ืืืชื {grade}.
|
| 713 |
๐ ืืืืจ: {"ื ืงืื" if student_gender == "F" else "ืืืจ"}. ืืฉืชืืฉ/ื ืืืฉืื ืืชืืืื.
|
| 714 |
|
|
|
|
|
|
|
| 715 |
โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 716 |
๐ ืฉืืืฉ ืฉืืื ืืืืืงื (ืืืื ืืืฆืข ืืคื ืืกืืจ):
|
| 717 |
โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
|
|
|
| 681 |
|
| 682 |
# ==================== V285.0: CHECK ME PROMPT (HOMEWORK VERIFICATION) ====================
|
| 683 |
|
| 684 |
+
def get_check_me_prompt(grade: str, student_name: str, student_gender: str = "M", data_anchor: dict = None):
|
| 685 |
"""
|
| 686 |
+
V285.1: Dedicated prompt for the "Check Me" feature with DATA ANCHOR.
|
| 687 |
The LLM acts as a homework checker, NOT a solver.
|
|
|
|
| 688 |
"""
|
| 689 |
# Gender-aware phrases
|
| 690 |
if student_gender == "F":
|
|
|
|
| 704 |
g_great = "ืืขืืื"
|
| 705 |
g_dear = f"{student_name} ืืงืจ"
|
| 706 |
|
| 707 |
+
anchor_block = ""
|
| 708 |
+
if data_anchor:
|
| 709 |
+
anchor_block = f"""
|
| 710 |
+
โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 711 |
+
๐ DATA INTEGRITY RULE (ABSOLUTE TRUTH):
|
| 712 |
+
โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 713 |
+
ืื ืชืื ืื ืืืื ืื ื ืชืื ื ืืฉืืื ืืืงืืจืืื ืืคื ืฉืืืื ืืฉืื ืื ืืชืื ืืืืงืื.
|
| 714 |
+
ืขืืื ืืืืืง ืืช ืคืชืจืื ืืชืืืื ืื ืืื ืื ืชืื ืื ืืืื ืืืืืง!
|
| 715 |
+
{json.dumps(data_anchor, indent=2, ensure_ascii=False)}
|
| 716 |
+
โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 717 |
+
"""
|
| 718 |
+
|
| 719 |
return f"""
|
| 720 |
๐ ืชืคืงืื: ืืชื ืืืืงืช ืฉืืขืืจื ืืืช โ ืืืจื ืคืจืืืช ืืื ืฉืืืืงืช ืืช ืืขืืืื ืฉื ืชืืืื.
|
| 721 |
๐ซ ืืชื ืื ืคืืชืจ ืืช ืืชืจืืื ืืืืฉ! ืืชื ืื ืชื ืืช ืื ืฉืืชืืืื ืืชื.
|
|
|
|
| 723 |
๐ค ืืชืืืื: {student_name}, ืืืชื {grade}.
|
| 724 |
๐ ืืืืจ: {"ื ืงืื" if student_gender == "F" else "ืืืจ"}. ืืฉืชืืฉ/ื ืืืฉืื ืืชืืืื.
|
| 725 |
|
| 726 |
+
{anchor_block}
|
| 727 |
+
|
| 728 |
โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 729 |
๐ ืฉืืืฉ ืฉืืื ืืืืืงื (ืืืื ืืืฆืข ืืคื ืืกืืจ):
|
| 730 |
โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
test_veo.py
DELETED
|
@@ -1,45 +0,0 @@
|
|
| 1 |
-
from google import genai
|
| 2 |
-
import os
|
| 3 |
-
|
| 4 |
-
# --- 1. ืืืืง ืืื ืืช ืืืคืชื ืืืจืื ืฉืืืฆืจืช ื-AI Studio ---
|
| 5 |
-
API_KEY = "AIzaSyDBw4Ddf2Fk4bSfe4aCFybAH74Cr-O-Quc"
|
| 6 |
-
|
| 7 |
-
def check_veo_access():
|
| 8 |
-
print("๐ ืืชืืื ืืืืงืช ืงืืฉืืจืืืช ืืื Gemini API...")
|
| 9 |
-
|
| 10 |
-
try:
|
| 11 |
-
# ืืชืืื ืืงืืืื ื
|
| 12 |
-
client = genai.Client(api_key=API_KEY)
|
| 13 |
-
|
| 14 |
-
# ืืืืงื 1: ืืื ืืืคืชื ืืืื ืขืืื?
|
| 15 |
-
print("๐ก ืืืืง ืืจืฉืืืช ืืคืชื ืืกืืกืืืช...")
|
| 16 |
-
models = client.models.list()
|
| 17 |
-
print("โ
ืืืคืชื ืชืงืื ืืืืืืจ ืืฉืจืชื ืืืื.")
|
| 18 |
-
|
| 19 |
-
# ืืืืงื 2: ืืื ืืืืื Veo ืคืชืื ืขืืืจื?
|
| 20 |
-
print("๐ฌ ืืืืง ืืืื ืืช ืกืคืฆืืคืืช ืืืืื Veo 3.1...")
|
| 21 |
-
veo_info = client.models.get(model="veo-3.1-generate-preview")
|
| 22 |
-
|
| 23 |
-
print("\n" + "="*40)
|
| 24 |
-
print(f"๐ ืืฉืืจื ืืชืืชืื ื: ืืื ืืืื!")
|
| 25 |
-
print(f"ืืืื {veo_info.name} ืืืื ืขืืืจื.")
|
| 26 |
-
print("ืืชื ืืืื ืืืจืืฅ ืืช ื-video_generator.py ืืืืืฆืจ ืืช ืืกืจืืื!")
|
| 27 |
-
print("="*40)
|
| 28 |
-
|
| 29 |
-
except Exception as e:
|
| 30 |
-
print("\n" + "!"*40)
|
| 31 |
-
print(f"โ ืืืืืงื ื ืืฉืื.")
|
| 32 |
-
|
| 33 |
-
error_msg = str(e).lower()
|
| 34 |
-
if "403" in error_msg or "permission" in error_msg:
|
| 35 |
-
print("\n๐ก ืืืื ื: ืืืคืชื ืชืงืื, ืืื ืืื ืื ืืจืฉืื ื-Veo.")
|
| 36 |
-
print("ืื ืงืืจื ืืืจื ืืื ืื ืืืฉืืื ืฉืื ืื ืืืืืจ ื-Paid Tier (ืขื ืืจืืืก ืืฉืจืื ืืขืืืื).")
|
| 37 |
-
print("ื-AI Studio, ืืืืื ืืืจืฉ ืืฉืืื ืขื ืืืฆืขื ืชืฉืืื (Pay-as-you-go).")
|
| 38 |
-
elif "401" in error_msg or "key" in error_msg:
|
| 39 |
-
print("\n๐ก ืืืื ื: ืืืคืชื ืื ืชืงืื. ืืื ืฉืืขืชืงืช ืืช ืื ืืืืจืืืช ื ืืื.")
|
| 40 |
-
else:
|
| 41 |
-
print(f"\nืฉืืืื ืืื ืืช: {e}")
|
| 42 |
-
print("!"*40)
|
| 43 |
-
|
| 44 |
-
if __name__ == "__main__":
|
| 45 |
-
check_veo_access()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
tests/test_validation_robustness.py
ADDED
|
@@ -0,0 +1,84 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pytest
|
| 2 |
+
import asyncio
|
| 3 |
+
from domain.math_validator import MathPolygraph, _latex_to_sympy_str
|
| 4 |
+
import sympy
|
| 5 |
+
|
| 6 |
+
@pytest.mark.asyncio
|
| 7 |
+
async def test_blind_stripping_and_regex():
|
| 8 |
+
# Test that mixed text is preserved and math is extracted
|
| 9 |
+
text = "ื ืฆืื $x=5$ ืื ืงืื $y=2$. ืืชืืฆืื ืืื $$z=7$$."
|
| 10 |
+
# _validate_single should return True because all math segments are valid
|
| 11 |
+
ok, reason = await MathPolygraph._validate_single(text, 1)
|
| 12 |
+
assert ok, f"Failed mixed text: {reason}"
|
| 13 |
+
|
| 14 |
+
@pytest.mark.asyncio
|
| 15 |
+
async def test_multiline_display_math():
|
| 16 |
+
# Test re.DOTALL with multi-line display math
|
| 17 |
+
text = """ืื ื ืืฉืืืื:
|
| 18 |
+
$$
|
| 19 |
+
x = 5 + 3
|
| 20 |
+
y = 10
|
| 21 |
+
$$
|
| 22 |
+
ืกืืฃ."""
|
| 23 |
+
ok, reason = await MathPolygraph._validate_single(text, 1)
|
| 24 |
+
assert ok, f"Failed multiline display math: {reason}"
|
| 25 |
+
|
| 26 |
+
@pytest.mark.asyncio
|
| 27 |
+
async def test_multi_equal_guard():
|
| 28 |
+
# x=y=5 should not crash unpacking
|
| 29 |
+
text = "ื ืชืื $x = y = 5$."
|
| 30 |
+
ok, reason = await MathPolygraph._validate_single(text, 1)
|
| 31 |
+
assert ok, f"Failed multi-equal check: {reason}"
|
| 32 |
+
|
| 33 |
+
@pytest.mark.asyncio
|
| 34 |
+
async def test_empty_string_guard():
|
| 35 |
+
# $$$$ should be ignored
|
| 36 |
+
text = "ืจืืง $$$$ ืืื $ $."
|
| 37 |
+
ok, reason = await MathPolygraph._validate_single(text, 1)
|
| 38 |
+
assert ok, f"Failed empty string guard: {reason}"
|
| 39 |
+
|
| 40 |
+
@pytest.mark.asyncio
|
| 41 |
+
async def test_arithmetic_exception_handling():
|
| 42 |
+
# $1/0$ should not crash the server
|
| 43 |
+
text = "ืืืืงื ืืืคืก $1/0$."
|
| 44 |
+
ok, reason = await MathPolygraph._validate_single(text, 1)
|
| 45 |
+
assert not ok
|
| 46 |
+
assert "SYMPY_PARSE_ERROR" in reason
|
| 47 |
+
|
| 48 |
+
@pytest.mark.asyncio
|
| 49 |
+
async def test_variable_trap_in_equivalence():
|
| 50 |
+
# Algebraic equivalence should pass syntax check but skip numerical identity
|
| 51 |
+
# x=5 is validated segment by segment (LHS: x, RHS: 5)
|
| 52 |
+
# are_equivalent for x=5 and x=5 should return True
|
| 53 |
+
res = MathPolygraph.are_equivalent("x=5", "x=5")
|
| 54 |
+
assert res is True
|
| 55 |
+
|
| 56 |
+
@pytest.mark.asyncio
|
| 57 |
+
async def test_numerical_identity_check():
|
| 58 |
+
# Valid identity
|
| 59 |
+
assert MathPolygraph.are_equivalent("2+3", "5") is True
|
| 60 |
+
# Hallucination
|
| 61 |
+
assert MathPolygraph.are_equivalent("2+3", "6") is False
|
| 62 |
+
|
| 63 |
+
@pytest.mark.asyncio
|
| 64 |
+
async def test_inequality_guard():
|
| 65 |
+
# x > 0 should not crash simplify(LHS - RHS)
|
| 66 |
+
# It should skip identity check and return True because strings are same
|
| 67 |
+
assert MathPolygraph.are_equivalent("x > 0", "x > 0") is True
|
| 68 |
+
|
| 69 |
+
@pytest.mark.asyncio
|
| 70 |
+
async def test_rce_protection():
|
| 71 |
+
# Malicious string that would execute if sympify was used without evaluate=False
|
| 72 |
+
# However, parse_expr(evaluate=False) just builds the tree.
|
| 73 |
+
# We just want to ensure it doesn't crash or execute.
|
| 74 |
+
# Note: testing "exec" in a string is hard without side effects,
|
| 75 |
+
# but we can verify it doesn't crash on standard malicious patterns.
|
| 76 |
+
text = "__import__('os').system('echo hello')"
|
| 77 |
+
# This should definitely fail parsing or at least not execute
|
| 78 |
+
ok, reason = await MathPolygraph._check_segment(text, 1)
|
| 79 |
+
assert not ok
|
| 80 |
+
assert "SYMPY_PARSE_ERROR" in reason
|
| 81 |
+
|
| 82 |
+
if __name__ == "__main__":
|
| 83 |
+
import pytest
|
| 84 |
+
pytest.main([__file__])
|
video_generator.py
DELETED
|
@@ -1,87 +0,0 @@
|
|
| 1 |
-
import time
|
| 2 |
-
import os
|
| 3 |
-
from google import genai
|
| 4 |
-
from google.genai import types
|
| 5 |
-
|
| 6 |
-
MODEL = "veo-3.1-generate-preview"
|
| 7 |
-
|
| 8 |
-
# ืืฉืื ืืืื โ ืืืืืืจ ืืคืชื API ืืกืืืื
|
| 9 |
-
|
| 10 |
-
API_KEY = "AIzaSyDAM6BLLVWZDJsq9p-NdckwKQIi8EfCeHo"
|
| 11 |
-
|
| 12 |
-
client = genai.Client(
|
| 13 |
-
api_key=API_KEY,
|
| 14 |
-
http_options={"api_version": "v1beta"}
|
| 15 |
-
)
|
| 16 |
-
|
| 17 |
-
def generate_first_shot():
|
| 18 |
-
|
| 19 |
-
print("๐ฌ ืืชืืื ืืฆืืจืช ืืฉืื ืืจืืฉืื")
|
| 20 |
-
|
| 21 |
-
# Upload reference image
|
| 22 |
-
print("โ๏ธ ืืขืื ืชืืื ืช reference...")
|
| 23 |
-
|
| 24 |
-
uploaded_file = client.files.upload(
|
| 25 |
-
file="boy_master.jpg"
|
| 26 |
-
)
|
| 27 |
-
|
| 28 |
-
print("โ
ืืืขืื:", uploaded_file.uri)
|
| 29 |
-
|
| 30 |
-
# Prompt ืงืืื ืืขื
|
| 31 |
-
prompt = """
|
| 32 |
-
Teenage boy sitting at wooden desk feeling frustrated while solving math homework.
|
| 33 |
-
He sighs slightly and looks at notebook.
|
| 34 |
-
|
| 35 |
-
Cinematic lighting, shallow depth of field.
|
| 36 |
-
Slow natural camera push in.
|
| 37 |
-
Subtle handheld motion.
|
| 38 |
-
High fidelity character consistency.
|
| 39 |
-
"""
|
| 40 |
-
|
| 41 |
-
# โญ ืืื ืื ืืกืื โ ืฆืจืื VideoGenerationSource
|
| 42 |
-
source = types.VideoGenerationSource(
|
| 43 |
-
prompt=prompt,
|
| 44 |
-
reference_images=[uploaded_file]
|
| 45 |
-
)
|
| 46 |
-
|
| 47 |
-
config = types.GenerateVideosConfig(
|
| 48 |
-
person_generation="dont_allow",
|
| 49 |
-
aspect_ratio="16:9",
|
| 50 |
-
number_of_videos=1,
|
| 51 |
-
duration_seconds=8,
|
| 52 |
-
resolution="720p"
|
| 53 |
-
)
|
| 54 |
-
|
| 55 |
-
print("๐ ืฉืืื ืืงืฉื ื-Veo 3.1 Standard...")
|
| 56 |
-
|
| 57 |
-
operation = client.models.generate_videos(
|
| 58 |
-
model=MODEL,
|
| 59 |
-
source=source,
|
| 60 |
-
config=config
|
| 61 |
-
)
|
| 62 |
-
|
| 63 |
-
# Polling
|
| 64 |
-
while not operation.done:
|
| 65 |
-
print("โณ ืืืื ืืจืื ืืืจ...")
|
| 66 |
-
time.sleep(10)
|
| 67 |
-
operation = client.operations.get(operation.name)
|
| 68 |
-
|
| 69 |
-
result = operation.result
|
| 70 |
-
|
| 71 |
-
if not result:
|
| 72 |
-
print("โ ืื ื ืืฆืจ ืืืืื")
|
| 73 |
-
return
|
| 74 |
-
|
| 75 |
-
for i, video in enumerate(result.generated_videos):
|
| 76 |
-
|
| 77 |
-
print("โจ ืืืืื ื ืืฆืจ:", video.video.uri)
|
| 78 |
-
|
| 79 |
-
client.files.download(file=video.video)
|
| 80 |
-
|
| 81 |
-
video.video.save(f"shot_01.mp4")
|
| 82 |
-
|
| 83 |
-
print("๐พ ื ืฉืืจ โ shot_01.mp4")
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
if __name__ == "__main__":
|
| 87 |
-
generate_first_shot()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|