V285.5: Multi-image ordering implementation and pedagogical grading logic
Browse files- domain/math_validator.py +26 -3
- firebase_manager.py +14 -15
- main.py +4 -0
- orchestrator.py +189 -44
- prompts.py +110 -18
- strategy_manager.py +2 -2
- visuals.py +45 -22
domain/math_validator.py
CHANGED
|
@@ -28,7 +28,9 @@ def _latex_to_sympy_str(latex_str: str) -> str:
|
|
| 28 |
Best-effort LaTeX → SymPy-parseable string.
|
| 29 |
V310.0: Aggressive Hebrew stripping and malformed notation cleanup.
|
| 30 |
"""
|
| 31 |
-
|
|
|
|
|
|
|
| 32 |
|
| 33 |
# 0. V310.0: Strip Hebrew characters and BOM/Zero-width chars immediately
|
| 34 |
s = re.sub(r'[\u0590-\u05FF\u200B-\u200D\uFEFF]', ' ', s)
|
|
@@ -96,6 +98,11 @@ class MathPolygraph:
|
|
| 96 |
2. parse_expr(evaluate=False): Prevent RCE and immediate evaluation.
|
| 97 |
"""
|
| 98 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 99 |
# RCE Prevention: Extreme character whitelist before parsing
|
| 100 |
# V280.0 FIX: Added ! for factorials and ensured strict match.
|
| 101 |
safe_pattern = r'^[a-zA-Z0-9\s\+\-\*\/\^\(\)\.\,\!\%\=]+$'
|
|
@@ -229,9 +236,13 @@ class MathPolygraph:
|
|
| 229 |
return True, ""
|
| 230 |
|
| 231 |
@staticmethod
|
| 232 |
-
async def validate_step_sequence(steps: List[dict]) -> Tuple[bool, str]:
|
| 233 |
if not steps:
|
| 234 |
return True, ""
|
|
|
|
|
|
|
|
|
|
|
|
|
| 235 |
for step in steps:
|
| 236 |
step_id = step.get('step_id', step.get('step_number', '?'))
|
| 237 |
math_fields = []
|
|
@@ -241,6 +252,14 @@ class MathPolygraph:
|
|
| 241 |
math_fields.append(val.strip())
|
| 242 |
if not math_fields:
|
| 243 |
continue
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 244 |
ok, reason = await MathPolygraph._validate_single(math_fields[0], step_id)
|
| 245 |
if not ok:
|
| 246 |
return False, reason
|
|
@@ -294,11 +313,15 @@ class MathPolygraph:
|
|
| 294 |
return False
|
| 295 |
|
| 296 |
@staticmethod
|
| 297 |
-
async def verify_algebraic_consistency(steps: List[dict]) -> Tuple[bool, str]:
|
| 298 |
"""
|
| 299 |
V1.3: Checks if a sequence of steps is algebraically consistent.
|
| 300 |
Currently checks if subsequent steps are equivalent (for simplifications).
|
| 301 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
| 302 |
math_steps = []
|
| 303 |
for step in steps:
|
| 304 |
math = step.get('math_latex') or step.get('block_math') or step.get('math')
|
|
|
|
| 28 |
Best-effort LaTeX → SymPy-parseable string.
|
| 29 |
V310.0: Aggressive Hebrew stripping and malformed notation cleanup.
|
| 30 |
"""
|
| 31 |
+
if latex_str is None:
|
| 32 |
+
return ""
|
| 33 |
+
s = str(latex_str).strip()
|
| 34 |
|
| 35 |
# 0. V310.0: Strip Hebrew characters and BOM/Zero-width chars immediately
|
| 36 |
s = re.sub(r'[\u0590-\u05FF\u200B-\u200D\uFEFF]', ' ', s)
|
|
|
|
| 98 |
2. parse_expr(evaluate=False): Prevent RCE and immediate evaluation.
|
| 99 |
"""
|
| 100 |
try:
|
| 101 |
+
# V317.8: Suppress SymPy Deprecation Warnings (e.g. non-Expr in Pow)
|
| 102 |
+
import warnings
|
| 103 |
+
from sympy.utilities.exceptions import SymPyDeprecationWarning
|
| 104 |
+
warnings.filterwarnings("ignore", category=SymPyDeprecationWarning)
|
| 105 |
+
|
| 106 |
# RCE Prevention: Extreme character whitelist before parsing
|
| 107 |
# V280.0 FIX: Added ! for factorials and ensured strict match.
|
| 108 |
safe_pattern = r'^[a-zA-Z0-9\s\+\-\*\/\^\(\)\.\,\!\%\=]+$'
|
|
|
|
| 236 |
return True, ""
|
| 237 |
|
| 238 |
@staticmethod
|
| 239 |
+
async def validate_step_sequence(steps: List[dict], topic: str = "GENERAL") -> Tuple[bool, str]:
|
| 240 |
if not steps:
|
| 241 |
return True, ""
|
| 242 |
+
|
| 243 |
+
# V8.9.4: Skip deep SymPy parsing for discrete sequence steps to avoid false-positive SyntaxErrors
|
| 244 |
+
is_sequence = topic and "SEQUENCE" in topic.upper()
|
| 245 |
+
|
| 246 |
for step in steps:
|
| 247 |
step_id = step.get('step_id', step.get('step_number', '?'))
|
| 248 |
math_fields = []
|
|
|
|
| 252 |
math_fields.append(val.strip())
|
| 253 |
if not math_fields:
|
| 254 |
continue
|
| 255 |
+
|
| 256 |
+
# If sequence, we only check if it's "valid-ish" LaTeX vs deep SymPy check
|
| 257 |
+
if is_sequence:
|
| 258 |
+
# Basic sanity check for LaTeX balance
|
| 259 |
+
if math_fields[0].count('{') != math_fields[0].count('}'):
|
| 260 |
+
return False, f"LATEX_BRACKET_MISMATCH:step_{step_id}"
|
| 261 |
+
continue
|
| 262 |
+
|
| 263 |
ok, reason = await MathPolygraph._validate_single(math_fields[0], step_id)
|
| 264 |
if not ok:
|
| 265 |
return False, reason
|
|
|
|
| 313 |
return False
|
| 314 |
|
| 315 |
@staticmethod
|
| 316 |
+
async def verify_algebraic_consistency(steps: List[dict], topic: str = "GENERAL") -> Tuple[bool, str]:
|
| 317 |
"""
|
| 318 |
V1.3: Checks if a sequence of steps is algebraically consistent.
|
| 319 |
Currently checks if subsequent steps are equivalent (for simplifications).
|
| 320 |
"""
|
| 321 |
+
# V8.9.4: Skip deep SymPy parsing for discrete sequence steps
|
| 322 |
+
if topic and "SEQUENCE" in topic.upper():
|
| 323 |
+
return True, ""
|
| 324 |
+
|
| 325 |
math_steps = []
|
| 326 |
for step in steps:
|
| 327 |
math = step.get('math_latex') or step.get('block_math') or step.get('math')
|
firebase_manager.py
CHANGED
|
@@ -30,42 +30,41 @@ class FirebaseManager:
|
|
| 30 |
return
|
| 31 |
|
| 32 |
try:
|
| 33 |
-
from config import
|
| 34 |
import json
|
| 35 |
|
| 36 |
logger.info("🛠️ [FIREBASE] Starting initialization...")
|
| 37 |
|
| 38 |
-
# --- DEBUG: Print all relevant env var names ---
|
| 39 |
-
fb_vars = [v for v in os.environ.keys() if "FIREBASE" in v or "GOOGLE" in v]
|
| 40 |
-
logger.info(f"🔍 [FIREBASE] Detected environment variables: {fb_vars}")
|
| 41 |
-
|
| 42 |
cred_dict = None
|
| 43 |
|
| 44 |
-
# Try loading from environment
|
| 45 |
-
|
| 46 |
-
|
|
|
|
| 47 |
try:
|
| 48 |
-
cred_dict = json.loads(
|
| 49 |
-
logger.info("✅ [FIREBASE] Successfully parsed credentials from
|
| 50 |
except Exception as e:
|
| 51 |
-
logger.error(f"❌ [FIREBASE] Failed to parse
|
| 52 |
|
| 53 |
-
# Fallback to file
|
| 54 |
if not cred_dict:
|
|
|
|
| 55 |
if os.path.exists(FIREBASE_CREDENTIALS_PATH):
|
| 56 |
with open(FIREBASE_CREDENTIALS_PATH, "r", encoding="utf-8") as f:
|
| 57 |
cred_dict = json.load(f)
|
| 58 |
logger.info(f"📂 [FIREBASE] Loading credentials from file: {FIREBASE_CREDENTIALS_PATH}.")
|
| 59 |
else:
|
| 60 |
-
logger.warning(f"⚠️ [FIREBASE]
|
| 61 |
-
# We don't return here, we try to see if it was already initialized by something else
|
| 62 |
|
| 63 |
if cred_dict:
|
| 64 |
cred = credentials.Certificate(cred_dict)
|
| 65 |
firebase_admin.initialize_app(cred, {
|
| 66 |
'storageBucket': STORAGE_BUCKET
|
| 67 |
})
|
| 68 |
-
logger.info(f"🚀 [FIREBASE] SDK Initialized for {'PROD' if IS_PRODUCTION else 'DEV'}.")
|
|
|
|
|
|
|
| 69 |
|
| 70 |
self._bucket = storage.bucket()
|
| 71 |
self._db = firestore.client()
|
|
|
|
| 30 |
return
|
| 31 |
|
| 32 |
try:
|
| 33 |
+
from config import STORAGE_BUCKET, IS_PRODUCTION
|
| 34 |
import json
|
| 35 |
|
| 36 |
logger.info("🛠️ [FIREBASE] Starting initialization...")
|
| 37 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
cred_dict = None
|
| 39 |
|
| 40 |
+
# Mission 1: Try loading from environment variables (checking multiple names for safety)
|
| 41 |
+
creds_str = os.environ.get("FIREBASE_CREDENTIALS") or os.environ.get("FIREBASE_CREDENTIALS_JSON")
|
| 42 |
+
|
| 43 |
+
if creds_str and len(creds_str.strip()) > 10:
|
| 44 |
try:
|
| 45 |
+
cred_dict = json.loads(creds_str)
|
| 46 |
+
logger.info("✅ [FIREBASE] Successfully parsed credentials from Environment Secrets!")
|
| 47 |
except Exception as e:
|
| 48 |
+
logger.error(f"❌ [FIREBASE] Failed to parse Environment Credentials: {e}")
|
| 49 |
|
| 50 |
+
# Fallback to local file only for local development (if no secret is set)
|
| 51 |
if not cred_dict:
|
| 52 |
+
from config import FIREBASE_CREDENTIALS_PATH
|
| 53 |
if os.path.exists(FIREBASE_CREDENTIALS_PATH):
|
| 54 |
with open(FIREBASE_CREDENTIALS_PATH, "r", encoding="utf-8") as f:
|
| 55 |
cred_dict = json.load(f)
|
| 56 |
logger.info(f"📂 [FIREBASE] Loading credentials from file: {FIREBASE_CREDENTIALS_PATH}.")
|
| 57 |
else:
|
| 58 |
+
logger.warning(f"⚠️ [FIREBASE] Credentials not found in environment OR local file at {FIREBASE_CREDENTIALS_PATH}.")
|
|
|
|
| 59 |
|
| 60 |
if cred_dict:
|
| 61 |
cred = credentials.Certificate(cred_dict)
|
| 62 |
firebase_admin.initialize_app(cred, {
|
| 63 |
'storageBucket': STORAGE_BUCKET
|
| 64 |
})
|
| 65 |
+
logger.info(f"🚀 [FIREBASE] SDK Initialized successfully for {'PROD' if IS_PRODUCTION else 'DEV'}.")
|
| 66 |
+
else:
|
| 67 |
+
logger.error("❌ [FIREBASE] CRITICAL ERROR: Firebase credentials not found! Firebase is OFFLINE.")
|
| 68 |
|
| 69 |
self._bucket = storage.bucket()
|
| 70 |
self._db = firestore.client()
|
main.py
CHANGED
|
@@ -387,6 +387,7 @@ async def solve_stream(
|
|
| 387 |
מקבל קובץ ישירות מהפלאטר ומפענח אותו עם OpenCV.
|
| 388 |
"""
|
| 389 |
final_student_name = student_name or user or "תלמיד"
|
|
|
|
| 390 |
print(f"🚀 🟢 BIT-LOG: Received Multipart request from {final_student_name}. Grade: {grade}")
|
| 391 |
|
| 392 |
# Quota Check
|
|
@@ -535,6 +536,9 @@ async def solve_stream_v2(
|
|
| 535 |
# Only increment usage if OCR/Solving process starts successfully
|
| 536 |
|
| 537 |
try:
|
|
|
|
|
|
|
|
|
|
| 538 |
# 1. קריאת הבינארי
|
| 539 |
image_bytes_list = []
|
| 540 |
for single_file in files:
|
|
|
|
| 387 |
מקבל קובץ ישירות מהפלאטר ומפענח אותו עם OpenCV.
|
| 388 |
"""
|
| 389 |
final_student_name = student_name or user or "תלמיד"
|
| 390 |
+
uid = None
|
| 391 |
print(f"🚀 🟢 BIT-LOG: Received Multipart request from {final_student_name}. Grade: {grade}")
|
| 392 |
|
| 393 |
# Quota Check
|
|
|
|
| 536 |
# Only increment usage if OCR/Solving process starts successfully
|
| 537 |
|
| 538 |
try:
|
| 539 |
+
# V316.5: Sort incoming files by filename to ensure image_00, image_01... order
|
| 540 |
+
files.sort(key=lambda x: x.filename)
|
| 541 |
+
|
| 542 |
# 1. קריאת הבינארי
|
| 543 |
image_bytes_list = []
|
| 544 |
for single_file in files:
|
orchestrator.py
CHANGED
|
@@ -19,8 +19,8 @@ from domain.schemas import BuddyEvent, BuddyState # V8.5: Streaming contract
|
|
| 19 |
from firebase_manager import firebase_manager
|
| 20 |
from config import IS_PRODUCTION, ENV, GEMINI_MODEL, CONFIDENCE_THRESHOLD_HIGH, CONFIDENCE_THRESHOLD_MEDIUM
|
| 21 |
|
| 22 |
-
# V8.6.9: Global Guardrails (Increased for High-Complexity 5-Unit Problems)
|
| 23 |
-
GLOBAL_TOKEN_LIMIT =
|
| 24 |
GLOBAL_TIMEOUT_SEC = 300
|
| 25 |
|
| 26 |
# ==================== V7.2: TICKET 1 — AST ENRICHMENT HELPERS ====================
|
|
@@ -173,8 +173,64 @@ def validate_and_sanitize_response(resp_json, category="GENERAL"):
|
|
| 173 |
step["explanation_text"] = "הסבר לא זמין עקב חריגה מהחוזה הפדגוגי."
|
| 174 |
|
| 175 |
resp_json["logic_error"] = resp_json.get("logic_error", False) or has_error
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 176 |
return resp_json
|
| 177 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 178 |
import asyncio
|
| 179 |
|
| 180 |
async def safe_llm_call(generator_func, timeout_seconds=45.0):
|
|
@@ -772,12 +828,16 @@ ctx.finish("$$ 4 $$", "מעולה! הגענו לתוצאה.")
|
|
| 772 |
"""V231.14: Phase 1 - Extract specific values with validation and image support."""
|
| 773 |
for attempt in range(1, 3): # 2 attempts
|
| 774 |
try:
|
|
|
|
| 775 |
prompt = prompts.get_data_extraction_prompt(problem_text)
|
|
|
|
|
|
|
| 776 |
|
| 777 |
-
# Build multimodal request if image is available
|
| 778 |
content = [prompt]
|
| 779 |
-
if image_data:
|
|
|
|
| 780 |
content.append({"mime_type": "image/png", "data": image_data})
|
|
|
|
| 781 |
|
| 782 |
res = await asyncio.wait_for(
|
| 783 |
self.model.generate_content_async(content),
|
|
@@ -788,8 +848,13 @@ ctx.finish("$$ 4 $$", "מעולה! הגענו לתוצאה.")
|
|
| 788 |
if match:
|
| 789 |
data = safe_json_loads(match.group())
|
| 790 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 791 |
# V261.X: Guard against parse-failure sentinel being treated as valid data
|
| 792 |
-
if data and data.get('logic_error') and data.get('error_type') == 'PARSING_FAILURE':
|
| 793 |
print(f"⚠️ [BIT-LOG] Data Anchor JSON parse failed (Attempt {attempt}/2) — skipping sentinel.")
|
| 794 |
continue
|
| 795 |
|
|
@@ -831,6 +896,8 @@ ctx.finish("$$ 4 $$", "מעולה! הגענו לתוצאה.")
|
|
| 831 |
print(f"⚠️ [BIT-LOG] Data Anchor timeout (Attempt {attempt}/2)")
|
| 832 |
except Exception as e:
|
| 833 |
print(f"⚠️ [BIT-LOG] Data Anchor error (Attempt {attempt}/2): {e}")
|
|
|
|
|
|
|
| 834 |
|
| 835 |
|
| 836 |
print("🚨 [BIT-LOG] CRITICAL: Data Anchor extraction failed completely!")
|
|
@@ -1166,7 +1233,7 @@ ctx.finish("$$ 4 $$", "מעולה! הגענו לתוצאה.")
|
|
| 1166 |
|
| 1167 |
# ===================== V285.0: CHECK ME (HOMEWORK VERIFICATION) =====================
|
| 1168 |
|
| 1169 |
-
async def _check_student_work(self,
|
| 1170 |
student_gender: str = "M", question_id: str = "q_check"):
|
| 1171 |
"""
|
| 1172 |
V285.0: Dedicated pipeline for the "Check Me" feature.
|
|
@@ -1185,8 +1252,9 @@ ctx.finish("$$ 4 $$", "מעולה! הגענו לתוצאה.")
|
|
| 1185 |
|
| 1186 |
try:
|
| 1187 |
# V311.0: Data Slicing Guardrail
|
| 1188 |
-
# First, transcribe and extract the "Absolute Truth" of the problem
|
| 1189 |
-
|
|
|
|
| 1190 |
problem_text = await self.transcribe_image(image_data)
|
| 1191 |
data_anchor = await self._extract_key_data(problem_text, image_data=image_data)
|
| 1192 |
|
|
@@ -1198,13 +1266,15 @@ ctx.finish("$$ 4 $$", "מעולה! הגענו לתוצאה.")
|
|
| 1198 |
data_anchor=data_anchor
|
| 1199 |
)
|
| 1200 |
|
| 1201 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1202 |
|
| 1203 |
response = await asyncio.wait_for(
|
| 1204 |
-
self.vision_model.generate_content_async(
|
| 1205 |
-
check_prompt,
|
| 1206 |
-
{"mime_type": "image/png", "data": image_data}
|
| 1207 |
-
]),
|
| 1208 |
timeout=60.0
|
| 1209 |
)
|
| 1210 |
|
|
@@ -1442,7 +1512,7 @@ ctx.finish("$$ 4 $$", "מעולה! הגענו לתוצאה.")
|
|
| 1442 |
yield BuddyEvent(
|
| 1443 |
question_id=question_id,
|
| 1444 |
state=BuddyState.STRATEGY_READY,
|
| 1445 |
-
payload={"sections": [strategy_card]} if strategy_card else
|
| 1446 |
)
|
| 1447 |
|
| 1448 |
# V300.3: Smart Visual Triggers (Product Alignment)
|
|
@@ -1679,14 +1749,14 @@ ctx.finish("$$ 4 $$", "מעולה! הגענו לתוצאה.")
|
|
| 1679 |
llm_steps = llm_resp if isinstance(llm_resp, list) else llm_resp.get("steps", [])
|
| 1680 |
|
| 1681 |
# 2. השרת שולט: הפעלת ה-Polygraph על הצעדים של ה-LLM
|
| 1682 |
-
struct_ok, struct_reason = await MathPolygraph.validate_step_sequence(llm_steps)
|
| 1683 |
|
| 1684 |
poly_ok = struct_ok
|
| 1685 |
poly_reason = struct_reason
|
| 1686 |
|
| 1687 |
if struct_ok:
|
| 1688 |
# V1.3: Also verify algebraic consistency (e.g. A + B = C)
|
| 1689 |
-
alg_ok, alg_reason = await MathPolygraph.verify_algebraic_consistency(llm_steps)
|
| 1690 |
if not alg_ok:
|
| 1691 |
poly_ok = False
|
| 1692 |
poly_reason = alg_reason
|
|
@@ -1718,13 +1788,9 @@ ctx.finish("$$ 4 $$", "מעולה! הגענו לתוצאה.")
|
|
| 1718 |
is_degraded = True
|
| 1719 |
degraded_reason = "polygraph_fail_forbidden_words"
|
| 1720 |
# Continue to next attempt
|
| 1721 |
-
elif attempts < max_attempts:
|
| 1722 |
-
# V280.0: If it's the first attempt, we MUST retry once to get better LaTeX
|
| 1723 |
-
print(f"🔄 [ROBUSTNESS] SymPy Parse Error on attempt {attempts}. Triggering retry for better LaTeX.")
|
| 1724 |
-
# We don't break here, so it continues the loop
|
| 1725 |
else:
|
| 1726 |
-
#
|
| 1727 |
-
print(f"🛡️ [SOFT FAIL]
|
| 1728 |
is_degraded = True
|
| 1729 |
degraded_reason = "sympy_soft_fail"
|
| 1730 |
break # Exit the attempt loop
|
|
@@ -1763,18 +1829,17 @@ ctx.finish("$$ 4 $$", "מעולה! הגענו לתוצאה.")
|
|
| 1763 |
}
|
| 1764 |
|
| 1765 |
# 4. Packaging & Yielding
|
| 1766 |
-
# V8.6.7 FIX: Only pass the final answer text forward to prevent massive JSON injection in future prompts
|
| 1767 |
-
|
| 1768 |
-
context[f"result_{sub_q['id']}"] = ans_text
|
| 1769 |
|
| 1770 |
# AI Assessment Telemetry Extraction
|
| 1771 |
if not assessment_sent and isinstance(solved_data, dict) and "assessment" in solved_data:
|
| 1772 |
assessment_data = solved_data["assessment"]
|
| 1773 |
if uid and assessment_data:
|
| 1774 |
try:
|
| 1775 |
-
import asyncio
|
| 1776 |
from analytics import analytics_manager
|
| 1777 |
-
asyncio.
|
|
|
|
| 1778 |
print(f"📊 [ANALYTICS] Triggered background telemetry for {uid}")
|
| 1779 |
assessment_sent = True
|
| 1780 |
except Exception as e:
|
|
@@ -1907,13 +1972,14 @@ ctx.finish("$$ 4 $$", "מעולה! הגענו לתוצאה.")
|
|
| 1907 |
|
| 1908 |
# V5.10.0: Save to History if Premium
|
| 1909 |
tier = kwargs.get('tier', 'student_basic')
|
| 1910 |
-
|
| 1911 |
print(f"🔍 [DEBUG HISTORY] UID: {uid}, Received Tier: '{tier}', kwargs keys: {list(kwargs.keys())}")
|
| 1912 |
is_premium = tier in ["premium", "admin", "admin_unlimited"]
|
| 1913 |
if is_premium and uid:
|
| 1914 |
try:
|
| 1915 |
-
#
|
| 1916 |
-
asyncio.
|
|
|
|
| 1917 |
print(f"📚 [HISTORY] History saving scheduled for {uid}")
|
| 1918 |
except Exception as e:
|
| 1919 |
print(f"❌ [HISTORY] Failed to schedule history saving: {e}")
|
|
@@ -2028,15 +2094,19 @@ ctx.finish("$$ 4 $$", "מעולה! הגענו לתוצאה.")
|
|
| 2028 |
"""
|
| 2029 |
V277.0: Main solve method with BINARY DATA SUPPORT.
|
| 2030 |
"""
|
|
|
|
| 2031 |
image_data_list = kwargs.get('image_data_list')
|
| 2032 |
image_data = kwargs.get('image_data') or kwargs.get('image_bytes')
|
| 2033 |
|
| 2034 |
-
# V308.0: Dual support logic
|
| 2035 |
if image_data and not image_data_list:
|
| 2036 |
image_data_list = [image_data]
|
| 2037 |
elif image_data_list and not image_data:
|
| 2038 |
image_data = image_data_list[0]
|
| 2039 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2040 |
question_id = kwargs.get('question_id', f"q_{int(time.time())}")
|
| 2041 |
start_time = asyncio.get_event_loop().time()
|
| 2042 |
# GLOBAL_TIMEOUT_SEC = 240 # 4 minutes usually
|
|
@@ -2055,7 +2125,7 @@ ctx.finish("$$ 4 $$", "מעולה! הגענו לתוצאה.")
|
|
| 2055 |
print(f"📝 [V285.0] Mode=CHECK detected. Routing to _check_student_work()...")
|
| 2056 |
student_gender = kwargs.get('student_gender', 'M')
|
| 2057 |
async for event in self._check_student_work(
|
| 2058 |
-
|
| 2059 |
grade=grade,
|
| 2060 |
student_name=student_name,
|
| 2061 |
student_gender=student_gender,
|
|
@@ -2065,9 +2135,17 @@ ctx.finish("$$ 4 $$", "מעולה! הגענו לתוצאה.")
|
|
| 2065 |
return
|
| 2066 |
# ===================== END CHECK ME ROUTING =====================
|
| 2067 |
|
| 2068 |
-
if
|
| 2069 |
-
print(f"🔵 [BIT-LOG] Starting OCR Pipeline on
|
| 2070 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2071 |
|
| 2072 |
logger.info(f"🔎 [TRACE] RAW OCR TEXT: {problem_text}")
|
| 2073 |
|
|
@@ -2127,7 +2205,7 @@ ctx.finish("$$ 4 $$", "מעולה! הגענו לתוצאה.")
|
|
| 2127 |
fast_result, _, _ = validate_and_fix_solution(fast_result)
|
| 2128 |
# Quick Polygraph check
|
| 2129 |
_poly_steps = collect_all_steps(fast_result)
|
| 2130 |
-
_poly_ok, _ = MathPolygraph.validate_step_sequence(_poly_steps)
|
| 2131 |
|
| 2132 |
if _poly_ok:
|
| 2133 |
yield BuddyEvent(
|
|
@@ -2142,8 +2220,19 @@ ctx.finish("$$ 4 $$", "מעולה! הגענו לתוצאה.")
|
|
| 2142 |
# ===================== FULL STREAMING PIPELINE =====================
|
| 2143 |
print(f"🎯 [BIT-LOG] Using Streaming Pipeline Strategy: {strategy.value}")
|
| 2144 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2145 |
data_anchor = await self._extract_key_data(problem_text, image_data=image_data) or {}
|
| 2146 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2147 |
# Iterate through the streaming smart_solve
|
| 2148 |
# V5.10.2: Remove keys already passed explicitly to avoid TypeError collision
|
| 2149 |
for _key in ['student_gender', 'image_data', 'image_data_list', 'image_bytes',
|
|
@@ -2520,11 +2609,66 @@ ctx.finish("$$ 4 $$", "מעולה! הגענו לתוצאה.")
|
|
| 2520 |
s = re.sub(r'\s+', ' ', s)
|
| 2521 |
return s.strip()
|
| 2522 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2523 |
async def _save_exercise_history(self, uid: str, question: str, solutions: list):
|
| 2524 |
-
"""
|
| 2525 |
try:
|
| 2526 |
db = firebase_manager.get_db()
|
| 2527 |
-
if not db: return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2528 |
|
| 2529 |
# Flatten solution steps into a single string
|
| 2530 |
solution_text_parts = []
|
|
@@ -2535,13 +2679,13 @@ ctx.finish("$$ 4 $$", "מעולה! הגענו לתוצאה.")
|
|
| 2535 |
title = section.get("section_title", "")
|
| 2536 |
solution_text_parts.append(f"### {title}")
|
| 2537 |
for step in section.get("steps", []):
|
| 2538 |
-
exp = step.get("explanation_text", "")
|
| 2539 |
-
math = step.get("math_artifact", {}).get("latex", "")
|
| 2540 |
-
if not math: math = step.get("block_math", "")
|
| 2541 |
|
| 2542 |
-
solution_text_parts.append(exp)
|
| 2543 |
if math:
|
| 2544 |
-
math = self._deep_sanitize_math(math)
|
| 2545 |
solution_text_parts.append(f"$${math}$$")
|
| 2546 |
solution_text_parts.append("---")
|
| 2547 |
|
|
@@ -2549,10 +2693,11 @@ ctx.finish("$$ 4 $$", "מעולה! הגענו לתוצאה.")
|
|
| 2549 |
|
| 2550 |
from firebase_admin import firestore
|
| 2551 |
import datetime
|
| 2552 |
-
# Save to history collection
|
| 2553 |
history_ref = db.collection('users').document(uid).collection('history').document()
|
| 2554 |
history_ref.set({
|
| 2555 |
"original_question_text": question,
|
|
|
|
| 2556 |
"solution_steps_text": full_solution,
|
| 2557 |
"timestamp": firestore.SERVER_TIMESTAMP
|
| 2558 |
})
|
|
|
|
| 19 |
from firebase_manager import firebase_manager
|
| 20 |
from config import IS_PRODUCTION, ENV, GEMINI_MODEL, CONFIDENCE_THRESHOLD_HIGH, CONFIDENCE_THRESHOLD_MEDIUM
|
| 21 |
|
| 22 |
+
# V8.6.9: Global Guardrails (Increased for High-Complexity 5-Unit Problems - V317.8)
|
| 23 |
+
GLOBAL_TOKEN_LIMIT = 100000
|
| 24 |
GLOBAL_TIMEOUT_SEC = 300
|
| 25 |
|
| 26 |
# ==================== V7.2: TICKET 1 — AST ENRICHMENT HELPERS ====================
|
|
|
|
| 173 |
step["explanation_text"] = "הסבר לא זמין עקב חריגה מהחוזה הפדגוגי."
|
| 174 |
|
| 175 |
resp_json["logic_error"] = resp_json.get("logic_error", False) or has_error
|
| 176 |
+
|
| 177 |
+
# V317.5: UI Sanitization Layer
|
| 178 |
+
if not resp_json.get("logic_error"):
|
| 179 |
+
resp_json = sanitize_llm_output(resp_json)
|
| 180 |
+
|
| 181 |
return resp_json
|
| 182 |
|
| 183 |
+
def unify_data_anchor(raw_data):
|
| 184 |
+
"""V317.5: Smart Data Anchor Unification (Prevents key overwrite)"""
|
| 185 |
+
if isinstance(raw_data, dict):
|
| 186 |
+
return raw_data
|
| 187 |
+
|
| 188 |
+
if isinstance(raw_data, list):
|
| 189 |
+
unified = {}
|
| 190 |
+
for item in raw_data:
|
| 191 |
+
if not isinstance(item, dict): continue
|
| 192 |
+
for key, value in item.items():
|
| 193 |
+
if key in unified:
|
| 194 |
+
# אם המפתח כבר קיים, נהפוך אותו לרשימה ונוסיף אליו
|
| 195 |
+
if isinstance(unified[key], list):
|
| 196 |
+
if value not in unified[key]:
|
| 197 |
+
unified[key].append(value)
|
| 198 |
+
else:
|
| 199 |
+
if unified[key] != value:
|
| 200 |
+
unified[key] = [unified[key], value]
|
| 201 |
+
else:
|
| 202 |
+
unified[key] = value
|
| 203 |
+
return unified
|
| 204 |
+
|
| 205 |
+
return {} # Fallback
|
| 206 |
+
|
| 207 |
+
def sanitize_llm_output(json_response):
|
| 208 |
+
"""V317.5: Cleans technical errors (SYMPY_PARSE_ERROR) and Hebrew from LaTeX."""
|
| 209 |
+
if not isinstance(json_response, dict):
|
| 210 |
+
return json_response
|
| 211 |
+
|
| 212 |
+
if "steps" in json_response:
|
| 213 |
+
for step in json_response["steps"]:
|
| 214 |
+
block_math = step.get("block_math", "")
|
| 215 |
+
if block_math:
|
| 216 |
+
# Mission 2: זיהוי שגיאות של SymPy
|
| 217 |
+
if "SYMPY_PARSE_ERROR" in block_math:
|
| 218 |
+
step["block_math"] = ""
|
| 219 |
+
step["content_mixed"] = step.get("content_mixed", "") + "\n(המשוואה הוסתרה עקב קושי בתצוגה)."
|
| 220 |
+
|
| 221 |
+
# Mission 2: זיהוי אותיות בעברית בתוך ה-LaTeX
|
| 222 |
+
elif re.search(r'[א-ת]', block_math):
|
| 223 |
+
# מעבירים את התוכן לשדה הטקסט ומוחקים את הבלוק המתמטי
|
| 224 |
+
clean_math = block_math.replace('\\text{', '').replace('}', '').replace('$', '')
|
| 225 |
+
step["content_mixed"] = step.get("content_mixed", "") + f"\n[{clean_math}]"
|
| 226 |
+
step["block_math"] = ""
|
| 227 |
+
|
| 228 |
+
# V280.0: Also check final_answer
|
| 229 |
+
if "final_answer" in json_response and "SYMPY_PARSE_ERROR" in str(json_response["final_answer"]):
|
| 230 |
+
json_response["final_answer"] = "התקבלה תשובה מורכבת (ראה שלבים מלאים)."
|
| 231 |
+
|
| 232 |
+
return json_response
|
| 233 |
+
|
| 234 |
import asyncio
|
| 235 |
|
| 236 |
async def safe_llm_call(generator_func, timeout_seconds=45.0):
|
|
|
|
| 828 |
"""V231.14: Phase 1 - Extract specific values with validation and image support."""
|
| 829 |
for attempt in range(1, 3): # 2 attempts
|
| 830 |
try:
|
| 831 |
+
print(f"⚓ [BIT-LOG] Data Anchor Extraction (Attempt {attempt}). Image Data: {type(image_data)} {len(image_data) if image_data else 'None'}")
|
| 832 |
prompt = prompts.get_data_extraction_prompt(problem_text)
|
| 833 |
+
if not prompt:
|
| 834 |
+
prompt = f"Extract math data from this problem: {problem_text}"
|
| 835 |
|
|
|
|
| 836 |
content = [prompt]
|
| 837 |
+
if image_data and isinstance(image_data, bytes):
|
| 838 |
+
# V316.9: Use canonical dict format for maximum SDK compatibility
|
| 839 |
content.append({"mime_type": "image/png", "data": image_data})
|
| 840 |
+
print(f"📸 [BIT-LOG] Appended image part (size: {len(image_data)})")
|
| 841 |
|
| 842 |
res = await asyncio.wait_for(
|
| 843 |
self.model.generate_content_async(content),
|
|
|
|
| 848 |
if match:
|
| 849 |
data = safe_json_loads(match.group())
|
| 850 |
|
| 851 |
+
# V317.5: Robust JSON Handling - Smart Unification
|
| 852 |
+
data = unify_data_anchor(data)
|
| 853 |
+
if isinstance(data, dict):
|
| 854 |
+
print(f"⚓ [BIT-LOG] Unified Data Anchor: {json.dumps(data, ensure_ascii=False)[:100]}...")
|
| 855 |
+
|
| 856 |
# V261.X: Guard against parse-failure sentinel being treated as valid data
|
| 857 |
+
if data and isinstance(data, dict) and data.get('logic_error') and data.get('error_type') == 'PARSING_FAILURE':
|
| 858 |
print(f"⚠️ [BIT-LOG] Data Anchor JSON parse failed (Attempt {attempt}/2) — skipping sentinel.")
|
| 859 |
continue
|
| 860 |
|
|
|
|
| 896 |
print(f"⚠️ [BIT-LOG] Data Anchor timeout (Attempt {attempt}/2)")
|
| 897 |
except Exception as e:
|
| 898 |
print(f"⚠️ [BIT-LOG] Data Anchor error (Attempt {attempt}/2): {e}")
|
| 899 |
+
import traceback
|
| 900 |
+
traceback.print_exc()
|
| 901 |
|
| 902 |
|
| 903 |
print("🚨 [BIT-LOG] CRITICAL: Data Anchor extraction failed completely!")
|
|
|
|
| 1233 |
|
| 1234 |
# ===================== V285.0: CHECK ME (HOMEWORK VERIFICATION) =====================
|
| 1235 |
|
| 1236 |
+
async def _check_student_work(self, image_data_list: List[bytes], grade: str, student_name: str,
|
| 1237 |
student_gender: str = "M", question_id: str = "q_check"):
|
| 1238 |
"""
|
| 1239 |
V285.0: Dedicated pipeline for the "Check Me" feature.
|
|
|
|
| 1252 |
|
| 1253 |
try:
|
| 1254 |
# V311.0: Data Slicing Guardrail
|
| 1255 |
+
# First, transcribe and extract the "Absolute Truth" of the problem from the FIRST image
|
| 1256 |
+
image_data = image_data_list[0]
|
| 1257 |
+
print("📝 [CHECK-ME] Step 1.5: Extracting Problem Data (Data Slicing from image_00)...")
|
| 1258 |
problem_text = await self.transcribe_image(image_data)
|
| 1259 |
data_anchor = await self._extract_key_data(problem_text, image_data=image_data)
|
| 1260 |
|
|
|
|
| 1266 |
data_anchor=data_anchor
|
| 1267 |
)
|
| 1268 |
|
| 1269 |
+
# Prepare images for Gemini Vision
|
| 1270 |
+
vision_content = [check_prompt]
|
| 1271 |
+
for img_bytes in image_data_list:
|
| 1272 |
+
vision_content.append({"mime_type": "image/png", "data": img_bytes})
|
| 1273 |
+
|
| 1274 |
+
print(f"📝 [CHECK-ME] Sending {len(image_data_list)} images + check prompt to Vision LLM...")
|
| 1275 |
|
| 1276 |
response = await asyncio.wait_for(
|
| 1277 |
+
self.vision_model.generate_content_async(vision_content),
|
|
|
|
|
|
|
|
|
|
| 1278 |
timeout=60.0
|
| 1279 |
)
|
| 1280 |
|
|
|
|
| 1512 |
yield BuddyEvent(
|
| 1513 |
question_id=question_id,
|
| 1514 |
state=BuddyState.STRATEGY_READY,
|
| 1515 |
+
payload={"sections": [strategy_card]} if strategy_card else {}
|
| 1516 |
)
|
| 1517 |
|
| 1518 |
# V300.3: Smart Visual Triggers (Product Alignment)
|
|
|
|
| 1749 |
llm_steps = llm_resp if isinstance(llm_resp, list) else llm_resp.get("steps", [])
|
| 1750 |
|
| 1751 |
# 2. השרת שולט: הפעלת ה-Polygraph על הצעדים של ה-LLM
|
| 1752 |
+
struct_ok, struct_reason = await MathPolygraph.validate_step_sequence(llm_steps, topic=sub_q.get('topic', 'GENERAL'))
|
| 1753 |
|
| 1754 |
poly_ok = struct_ok
|
| 1755 |
poly_reason = struct_reason
|
| 1756 |
|
| 1757 |
if struct_ok:
|
| 1758 |
# V1.3: Also verify algebraic consistency (e.g. A + B = C)
|
| 1759 |
+
alg_ok, alg_reason = await MathPolygraph.verify_algebraic_consistency(llm_steps, topic=sub_q.get('topic', 'GENERAL'))
|
| 1760 |
if not alg_ok:
|
| 1761 |
poly_ok = False
|
| 1762 |
poly_reason = alg_reason
|
|
|
|
| 1788 |
is_degraded = True
|
| 1789 |
degraded_reason = "polygraph_fail_forbidden_words"
|
| 1790 |
# Continue to next attempt
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1791 |
else:
|
| 1792 |
+
# V317.8 Soft Fail: Treat SymPy Parse Error as a warning immediately to avoid retries on valid LaTeX
|
| 1793 |
+
print(f"🛡️ [SOFT FAIL] SymPy Parse Error detected (Attempt {attempts}). No forbidden words found. Trusting LLM output for sub-q {sub_q['id']}.")
|
| 1794 |
is_degraded = True
|
| 1795 |
degraded_reason = "sympy_soft_fail"
|
| 1796 |
break # Exit the attempt loop
|
|
|
|
| 1829 |
}
|
| 1830 |
|
| 1831 |
# 4. Packaging & Yielding
|
| 1832 |
+
# V8.6.7 FIX / V317.5: Only pass the final answer text forward to prevent massive JSON injection in future prompts
|
| 1833 |
+
context[f"result_{sub_q['id']}"] = solved_data.get("final_answer", "No valid answer extracted") if isinstance(solved_data, dict) else "הושלם"
|
|
|
|
| 1834 |
|
| 1835 |
# AI Assessment Telemetry Extraction
|
| 1836 |
if not assessment_sent and isinstance(solved_data, dict) and "assessment" in solved_data:
|
| 1837 |
assessment_data = solved_data["assessment"]
|
| 1838 |
if uid and assessment_data:
|
| 1839 |
try:
|
|
|
|
| 1840 |
from analytics import analytics_manager
|
| 1841 |
+
loop = asyncio.get_event_loop()
|
| 1842 |
+
loop.create_task(asyncio.to_thread(analytics_manager.update_weekly_analytics, uid, assessment_data))
|
| 1843 |
print(f"📊 [ANALYTICS] Triggered background telemetry for {uid}")
|
| 1844 |
assessment_sent = True
|
| 1845 |
except Exception as e:
|
|
|
|
| 1972 |
|
| 1973 |
# V5.10.0: Save to History if Premium
|
| 1974 |
tier = kwargs.get('tier', 'student_basic')
|
| 1975 |
+
# Variable uid is already defined at start of smart_solve
|
| 1976 |
print(f"🔍 [DEBUG HISTORY] UID: {uid}, Received Tier: '{tier}', kwargs keys: {list(kwargs.keys())}")
|
| 1977 |
is_premium = tier in ["premium", "admin", "admin_unlimited"]
|
| 1978 |
if is_premium and uid:
|
| 1979 |
try:
|
| 1980 |
+
# V315.0: Explicit scheduling with loop check
|
| 1981 |
+
loop = asyncio.get_event_loop()
|
| 1982 |
+
loop.create_task(self._save_exercise_history(uid, problem_text, all_solutions))
|
| 1983 |
print(f"📚 [HISTORY] History saving scheduled for {uid}")
|
| 1984 |
except Exception as e:
|
| 1985 |
print(f"❌ [HISTORY] Failed to schedule history saving: {e}")
|
|
|
|
| 2094 |
"""
|
| 2095 |
V277.0: Main solve method with BINARY DATA SUPPORT.
|
| 2096 |
"""
|
| 2097 |
+
uid = kwargs.get('uid')
|
| 2098 |
image_data_list = kwargs.get('image_data_list')
|
| 2099 |
image_data = kwargs.get('image_data') or kwargs.get('image_bytes')
|
| 2100 |
|
|
|
|
| 2101 |
if image_data and not image_data_list:
|
| 2102 |
image_data_list = [image_data]
|
| 2103 |
elif image_data_list and not image_data:
|
| 2104 |
image_data = image_data_list[0]
|
| 2105 |
|
| 2106 |
+
# V316.0: CRITICAL - Ensure image_data is explicitly passed in kwargs for the rest of parameters
|
| 2107 |
+
kwargs['image_data'] = image_data
|
| 2108 |
+
kwargs['image_data_list'] = image_data_list
|
| 2109 |
+
|
| 2110 |
question_id = kwargs.get('question_id', f"q_{int(time.time())}")
|
| 2111 |
start_time = asyncio.get_event_loop().time()
|
| 2112 |
# GLOBAL_TIMEOUT_SEC = 240 # 4 minutes usually
|
|
|
|
| 2125 |
print(f"📝 [V285.0] Mode=CHECK detected. Routing to _check_student_work()...")
|
| 2126 |
student_gender = kwargs.get('student_gender', 'M')
|
| 2127 |
async for event in self._check_student_work(
|
| 2128 |
+
image_data_list=image_data_list,
|
| 2129 |
grade=grade,
|
| 2130 |
student_name=student_name,
|
| 2131 |
student_gender=student_gender,
|
|
|
|
| 2135 |
return
|
| 2136 |
# ===================== END CHECK ME ROUTING =====================
|
| 2137 |
|
| 2138 |
+
if image_data_list:
|
| 2139 |
+
print(f"🔵 [BIT-LOG] Starting OCR Pipeline on {len(image_data_list)} images...")
|
| 2140 |
+
ocr_results = []
|
| 2141 |
+
for i, img in enumerate(image_data_list):
|
| 2142 |
+
print(f"📸 [BIT-LOG] Transcribing image {i}...")
|
| 2143 |
+
text = await self.transcribe_image(img)
|
| 2144 |
+
if text:
|
| 2145 |
+
ocr_results.append(text)
|
| 2146 |
+
|
| 2147 |
+
problem_text = "\n\n".join(ocr_results)
|
| 2148 |
+
image_data = image_data_list[0] # Use first image for main processing logic/anchors
|
| 2149 |
|
| 2150 |
logger.info(f"🔎 [TRACE] RAW OCR TEXT: {problem_text}")
|
| 2151 |
|
|
|
|
| 2205 |
fast_result, _, _ = validate_and_fix_solution(fast_result)
|
| 2206 |
# Quick Polygraph check
|
| 2207 |
_poly_steps = collect_all_steps(fast_result)
|
| 2208 |
+
_poly_ok, _ = MathPolygraph.validate_step_sequence(_poly_steps, topic=str(strategy.value))
|
| 2209 |
|
| 2210 |
if _poly_ok:
|
| 2211 |
yield BuddyEvent(
|
|
|
|
| 2220 |
# ===================== FULL STREAMING PIPELINE =====================
|
| 2221 |
print(f"🎯 [BIT-LOG] Using Streaming Pipeline Strategy: {strategy.value}")
|
| 2222 |
|
| 2223 |
+
# V316.0: image_data is already hydrated at the top of solve_problem.
|
| 2224 |
+
# This block is now redundant but kept for safety if someone moves things.
|
| 2225 |
+
if image_data is None and image_data_list:
|
| 2226 |
+
image_data = image_data_list[0]
|
| 2227 |
+
print("📸 [BIT-LOG] Using first image from list for Data Anchor phase. (Redundant Check)")
|
| 2228 |
+
|
| 2229 |
data_anchor = await self._extract_key_data(problem_text, image_data=image_data) or {}
|
| 2230 |
|
| 2231 |
+
# V8.9.2: SEPARATE VALIDATOR PASS (Single Source of Truth)
|
| 2232 |
+
if image_data and data_anchor:
|
| 2233 |
+
print("🛡️ [V8.9.2] Starting Data Anchor Validation Pass...")
|
| 2234 |
+
data_anchor = await self._validate_anchor(data_anchor, image_data)
|
| 2235 |
+
|
| 2236 |
# Iterate through the streaming smart_solve
|
| 2237 |
# V5.10.2: Remove keys already passed explicitly to avoid TypeError collision
|
| 2238 |
for _key in ['student_gender', 'image_data', 'image_data_list', 'image_bytes',
|
|
|
|
| 2609 |
s = re.sub(r'\s+', ' ', s)
|
| 2610 |
return s.strip()
|
| 2611 |
|
| 2612 |
+
async def _validate_anchor(self, data_anchor: dict, image_data: bytes) -> dict:
|
| 2613 |
+
"""V8.9.2: Single Source of Truth Validator pass."""
|
| 2614 |
+
try:
|
| 2615 |
+
from prompts import get_anchor_validation_prompt
|
| 2616 |
+
from utils.safe_json import safe_extract_json
|
| 2617 |
+
|
| 2618 |
+
prompt = get_anchor_validation_prompt(data_anchor)
|
| 2619 |
+
|
| 2620 |
+
# Using current model which supports Vision
|
| 2621 |
+
response = await self.model.generate_content_async(
|
| 2622 |
+
[
|
| 2623 |
+
{"mime_type": "image/jpeg", "data": image_data},
|
| 2624 |
+
prompt
|
| 2625 |
+
]
|
| 2626 |
+
)
|
| 2627 |
+
|
| 2628 |
+
print(f"🛡️ [V8.9.3] Raw Validator Response: {response.text[:200]}...")
|
| 2629 |
+
clean_anchor = safe_extract_json(response.text, "anchor_validator")
|
| 2630 |
+
if clean_anchor:
|
| 2631 |
+
print(f"🛡️ ✅ [V8.9.2] Anchor Validated: {len(clean_anchor.get('function_equations', []))} equations found.")
|
| 2632 |
+
return clean_anchor
|
| 2633 |
+
|
| 2634 |
+
return data_anchor
|
| 2635 |
+
except Exception as e:
|
| 2636 |
+
print(f"⚠️ [V8.9.2] Anchor Validation failed: {e}. Falling back to raw OCR.")
|
| 2637 |
+
return data_anchor
|
| 2638 |
+
|
| 2639 |
async def _save_exercise_history(self, uid: str, question: str, solutions: list):
|
| 2640 |
+
"""V317.0: Saves sanitized exercise history with clean titles."""
|
| 2641 |
try:
|
| 2642 |
db = firebase_manager.get_db()
|
| 2643 |
+
if not db or not uid: return
|
| 2644 |
+
|
| 2645 |
+
def generate_clean_title(ocr_raw_text):
|
| 2646 |
+
try:
|
| 2647 |
+
# מנקה JSON אם קיים
|
| 2648 |
+
if isinstance(ocr_raw_text, str) and ocr_raw_text.strip().startswith('{'):
|
| 2649 |
+
data = json.loads(ocr_raw_text)
|
| 2650 |
+
text = data.get('text', '')
|
| 2651 |
+
else:
|
| 2652 |
+
text = str(ocr_raw_text)
|
| 2653 |
+
|
| 2654 |
+
# ניקוי LaTeX וסימנים טכניים מתקדם
|
| 2655 |
+
# 1. הסרת בלוקים של מתמטיקה $...$
|
| 2656 |
+
text = re.sub(r'\$.*?\$', '', text)
|
| 2657 |
+
# 2. הסרת פקודות LaTeX נפוצות (למשל \frac{...}{...})
|
| 2658 |
+
text = re.sub(r'\\[a-zA-Z]+', '', text)
|
| 2659 |
+
# 3. הסרת סוגריים מסולסלים ומרובעים
|
| 2660 |
+
text = re.sub(r'[\{\}\[\]]', '', text)
|
| 2661 |
+
# 4. ניקוי סימנים מתמטיים שאריתיים
|
| 2662 |
+
text = re.sub(r'[\^_*=+\-/|]', '', text)
|
| 2663 |
+
|
| 2664 |
+
# חיתוך ל-6 מילים ראשונות
|
| 2665 |
+
words = text.split()
|
| 2666 |
+
if not words: return "תרגיל במתמטיקה"
|
| 2667 |
+
title = " ".join(words[:6])
|
| 2668 |
+
if len(words) > 6: title += "..."
|
| 2669 |
+
return title.replace('\n', ' ').strip()
|
| 2670 |
+
except Exception:
|
| 2671 |
+
return "תרגיל במתמטיקה"
|
| 2672 |
|
| 2673 |
# Flatten solution steps into a single string
|
| 2674 |
solution_text_parts = []
|
|
|
|
| 2679 |
title = section.get("section_title", "")
|
| 2680 |
solution_text_parts.append(f"### {title}")
|
| 2681 |
for step in section.get("steps", []):
|
| 2682 |
+
exp = step.get("explanation_text", "") or ""
|
| 2683 |
+
math = step.get("math_artifact", {}).get("latex", "") or ""
|
| 2684 |
+
if not math: math = step.get("block_math", "") or ""
|
| 2685 |
|
| 2686 |
+
solution_text_parts.append(str(exp))
|
| 2687 |
if math:
|
| 2688 |
+
math = self._deep_sanitize_math(str(math))
|
| 2689 |
solution_text_parts.append(f"$${math}$$")
|
| 2690 |
solution_text_parts.append("---")
|
| 2691 |
|
|
|
|
| 2693 |
|
| 2694 |
from firebase_admin import firestore
|
| 2695 |
import datetime
|
| 2696 |
+
# Save to history collection with clean title
|
| 2697 |
history_ref = db.collection('users').document(uid).collection('history').document()
|
| 2698 |
history_ref.set({
|
| 2699 |
"original_question_text": question,
|
| 2700 |
+
"display_title": generate_clean_title(question),
|
| 2701 |
"solution_steps_text": full_solution,
|
| 2702 |
"timestamp": firestore.SERVER_TIMESTAMP
|
| 2703 |
})
|
prompts.py
CHANGED
|
@@ -230,15 +230,19 @@ def _detect_relevant_rules(text: str, category: str = "") -> list:
|
|
| 230 |
return rules
|
| 231 |
|
| 232 |
def get_data_extraction_prompt(problem_text: str) -> str:
|
| 233 |
-
"""V231.
|
| 234 |
-
|
| 235 |
-
### [
|
| 236 |
You are provided with an IMAGE and its rough OCR transcription.
|
| 237 |
-
The OCR text is
|
| 238 |
-
|
| 239 |
-
Use the OCR text only as a secondary reference for context.
|
| 240 |
|
| 241 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 242 |
{problem_text}
|
| 243 |
|
| 244 |
Extract:
|
|
@@ -248,6 +252,17 @@ Extract:
|
|
| 248 |
- Lines: y = mx + b, ax + by = c
|
| 249 |
- ANY equation with '=' sign!
|
| 250 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 251 |
2. **points**: Named points like A, B, M(3,5), P(x,y)
|
| 252 |
|
| 253 |
3. **specific_values**: Numbers like r=5, a=3, m=2
|
|
@@ -274,6 +289,15 @@ JSON format (STRUCTURE ONLY - DO NOT USE THESE EXACT VALUES):
|
|
| 274 |
"point_b": "(null, null)"
|
| 275 |
}}
|
| 276 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 277 |
CRITICAL INSTRUCTIONS:
|
| 278 |
1. Include ALL equations with '=' sign in function_equations!
|
| 279 |
2. **DATA INTEGRITY (V4.3.0):** Use the data below verbatim.
|
|
@@ -293,7 +317,7 @@ def get_specialist_prompt(category, problem_text, solver_hint, grade, student_na
|
|
| 293 |
"""בניית הפרומפט המלכותי — המורה למתמטיקה V231.6 (Data Anchor)"""
|
| 294 |
features = _get_grade_features(grade, category)
|
| 295 |
relevant_rules = _detect_relevant_rules(problem_text, category)
|
| 296 |
-
rules_str = "\n".join([f" - {r}" for r in relevant_rules]) if relevant_rules else " (לא זוהו כללים ספציפיים — בחר רק כללים רלוונטיים לקטגוריה {category})"
|
| 297 |
|
| 298 |
# Anchor Block — DATA INTEGRITY RULE
|
| 299 |
anchor_block = ""
|
|
@@ -314,7 +338,6 @@ def get_specialist_prompt(category, problem_text, solver_hint, grade, student_na
|
|
| 314 |
CONSTRAINT: If the data says A(0,5), use A(0,5). If it contains f(x), solve that f(x).
|
| 315 |
"""
|
| 316 |
|
| 317 |
-
|
| 318 |
# V231.5: Gender-aware phrases
|
| 319 |
if student_gender == "F":
|
| 320 |
g = {
|
|
@@ -387,11 +410,34 @@ def get_specialist_prompt(category, problem_text, solver_hint, grade, student_na
|
|
| 387 |
"concave_down": ["(b, c)"]
|
| 388 |
}
|
| 389 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 390 |
|
| 391 |
{proof_block}
|
| 392 |
{investigation_block}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 393 |
"""
|
| 394 |
-
|
| 395 |
|
| 396 |
|
| 397 |
# prompts.py - V275.1 (Safe OCR - Technique over Examples)
|
|
@@ -550,14 +596,15 @@ def get_master_prompt_v860(category: str = "", problem_text: str = ""):
|
|
| 550 |
═══════════════════════════════════════════
|
| 551 |
ABSOLUTE RULES (violations cause immediate rejection):
|
| 552 |
═══════════════════════════════════════════
|
| 553 |
-
1. **DATA ANCHOR SUPREMACY:** The equations in the JSON Data Anchor are the ABSOLUTE TRUTH.
|
| 554 |
2. **ZERO MAGIC MATH (BABY STEPS):**
|
| 555 |
- NEVER skip algebraic steps. Show moving sides, dividing, and expanding brackets.
|
| 556 |
- ALWAYS explain the mathematical rule/theorem *BEFORE* applying it.
|
| 557 |
* Bad: "נגזור ונשווה לאפס: f'(x) = 2x"
|
| 558 |
* Good: "כדי למצוא נקודת קיצון נגזור את הפונקציה. מכיוון שזו מנה, נשתמש בכלל המנה האומר ש... נגזור את המונה בנפרד ואת המכנה בנפרד:"
|
| 559 |
3. **COMPLETE THE MISSION (ANTI-TRUNCATION):** Never abandon the task. You MUST reach the final numeric/algebraic answer. If asked for extrema, you must find x, y, and classify (max/min).
|
| 560 |
-
4. **
|
|
|
|
| 561 |
- `content_mixed`: ONLY for your warm Hebrew explanation and short inline variables (e.g. $x=5$). Do NOT put long equations, derivatives, or multi-line steps here!
|
| 562 |
- `block_math`: This is where the ACTUAL CALCULATION goes. It must contain the main equation or algebraic step in PURE LaTeX.
|
| 563 |
- IF there is any mathematical derivation or calculation in a step, it MUST go into `block_math` and NOT into `content_mixed`. NEVER put an equation on a new line inside `content_mixed`.
|
|
@@ -574,11 +621,16 @@ def get_master_prompt_v860(category: str = "", problem_text: str = ""):
|
|
| 574 |
- State factually in `content_mixed`: "אני מזהה סתירה בנתונים, בואו נבדוק שוב את הפונקציה המקורית. על פי החישוב שלי [הסבר קצר...]."
|
| 575 |
- Focus on the TRUTH of your calculation. Never force a derivation to match a (likely misread) OCR error.
|
| 576 |
- **Mathematical Logic (V8.6.6):** If OCR is ambiguous (e.g. $e^x$ vs $e^{-x}$), use the overall context of the question (e.g., domain, asymptotes, or known behavior) to determine the logically correct formula.
|
| 577 |
-
8. **
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 578 |
- In the `final_answer` field, NEVER use `\\` or `\newline` for line breaks.
|
| 579 |
- **BIDI SAFETY:** PROHIBIT Hebrew text (like "או", "וגם") inside `final_answer` or `block_math`.
|
| 580 |
- If there are multiple answers, separate them ONLY with English commas (e.g., "x=1, x=2").
|
| 581 |
-
|
| 582 |
- Always use standard mathematical variables ($x, y, z, m, n$) as provided in context. Never invent new variable names not found in the Data Anchor or original problem.
|
| 583 |
10. **STRATEGY CARD NO-SPOILER RULE (CRITICAL):**
|
| 584 |
- The `strategy_card` MUST NOT contain any numbers, final equations, derivatives, or solutions.
|
|
@@ -611,12 +663,13 @@ def get_master_prompt_v860(category: str = "", problem_text: str = ""):
|
|
| 611 |
- NEVER use data, parameters (like $a=1$), or specific values that explicitly belong to a LATER sub-question (e.g., Section ב') to solve an EARLIER sub-question (e.g., Section א').
|
| 612 |
- Solve earlier sections algebraically using general variables unless the data is part of the global question anchor.
|
| 613 |
- If a student's finding in Section א' is required for Section ב', you may use it, but NEVER the other way around.
|
| 614 |
-
17. **
|
|
|
|
| 615 |
|
| 616 |
|
| 617 |
-
═══════════════════════════════════════════
|
| 618 |
REQUIRED JSON STRUCTURE (EXACT KEYS):
|
| 619 |
-
═══════════════════════════════════════════
|
| 620 |
{{
|
| 621 |
{graph_field}
|
| 622 |
"strategy_card": {{
|
|
@@ -702,6 +755,10 @@ def get_check_me_prompt(grade: str, student_name: str, student_gender: str = "M"
|
|
| 702 |
🎓 תפקיד: אתה בודקת שיעורי בית — מורה פרטית חמה שבודקת את העבודה של תלמיד.
|
| 703 |
🚫 אתה לא פותר את התרגיל מחדש! אתה מנתח את מה שהתלמיד כתב.
|
| 704 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 705 |
👤 התלמיד: {student_name}, כיתה {grade}.
|
| 706 |
👑 מגדר: {"נקבה" if student_gender == "F" else "זכר"}. השתמש/י בלשון מתאימה.
|
| 707 |
|
|
@@ -827,6 +884,41 @@ def get_teacher_summary_prompt(student_name: str, student_gender: str = "M"):
|
|
| 827 |
"formulas_to_remember": ["LaTeX נוסחה 1", "LaTeX נוסחה 2"],
|
| 828 |
"tts_speech": "טקסט דיבור עברי נקי ל-TTS"
|
| 829 |
}}
|
|
|
|
| 830 |
|
| 831 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 832 |
"""
|
|
|
|
| 230 |
return rules
|
| 231 |
|
| 232 |
def get_data_extraction_prompt(problem_text: str) -> str:
|
| 233 |
+
"""V231.15: Template-based extraction to avoid f-string escape hell."""
|
| 234 |
+
template = r"""
|
| 235 |
+
### [SACRED TRUTH: IMAGE OVER OCR]
|
| 236 |
You are provided with an IMAGE and its rough OCR transcription.
|
| 237 |
+
⚠️ CRITICAL WARNING: The OCR text is a WEAK HINT and is often WRONG, truncated, or mangled.
|
| 238 |
+
💎 SACRED TRUTH: The IMAGE is the absolute source of truth.
|
|
|
|
| 239 |
|
| 240 |
+
YOUR TASK:
|
| 241 |
+
1. PERCEPTUAL PRIORITY: Visually inspect the IMAGE meticulously. Every pixel of the formula counts.
|
| 242 |
+
2. OCR SKEPTICISM: If the OCR text says \frac{{1}}{{(\ln x)^2}} but the IMAGE shows \frac{{1}}{{\ln^2 x}}, you MUST ignore the OCR and extract \frac{{1}}{{\ln^2 x}}.
|
| 243 |
+
3. NO INVENTIONS: Do not "fix" the math if it looks weird. Extract strictly what is drawn.
|
| 244 |
+
|
| 245 |
+
Problem Text (OCR Hint):
|
| 246 |
{problem_text}
|
| 247 |
|
| 248 |
Extract:
|
|
|
|
| 252 |
- Lines: y = mx + b, ax + by = c
|
| 253 |
- ANY equation with '=' sign!
|
| 254 |
|
| 255 |
+
**Mathematical Scanning Directionality (V8.9.3):**
|
| 256 |
+
- **LTR PRIORITY:** Mathematical equations are STRICTLY Left-To-Right.
|
| 257 |
+
- When extracting formulas embedded in Hebrew text, IGNORE the RTL flow.
|
| 258 |
+
- Read the characters in their literal horizontal visual order from LEFT to RIGHT.
|
| 259 |
+
- Ensure multipliers, exponents, and signs are placed exactly where they appear visually.
|
| 260 |
+
|
| 261 |
+
**Visual Context & Continuity (V8.9.5):**
|
| 262 |
+
- **SPLIT EQUATIONS:** If an equation starts on one line and ends on another, merge them into a single coherent formula.
|
| 263 |
+
- **SEGMENTED CONSTRAINTS:** Look for constraints (like x > 0) near equations; they are often visually separated by space or Hebrew words but belong to the formula.
|
| 264 |
+
- **MULTI-PART ANCHORING:** Maintain consistency between sub-questions (א, ב, ג). If a variable 'm' is defined in the preamble, it applies to all sub-questions.
|
| 265 |
+
|
| 266 |
2. **points**: Named points like A, B, M(3,5), P(x,y)
|
| 267 |
|
| 268 |
3. **specific_values**: Numbers like r=5, a=3, m=2
|
|
|
|
| 289 |
"point_b": "(null, null)"
|
| 290 |
}}
|
| 291 |
|
| 292 |
+
|
| 293 |
+
**CRITICAL JSON STRUCTURE RULE (V8.9.4):**
|
| 294 |
+
- You MUST output a SINGLE, flat JSON object.
|
| 295 |
+
- EVERY mathematical expression, function, or parameter MUST have a UNIQUE, highly descriptive key.
|
| 296 |
+
- DO NOT use a generic key like "equation" multiple times, as this will overwrite the data.
|
| 297 |
+
- DO NOT output an array/list of objects.
|
| 298 |
+
- **Correct Example:** { "main_function_f": "f(x)=...", "function_h": "h(x)=...", "given_extremum_x": "x=1/e" }
|
| 299 |
+
- **Wrong Example:** { "equation": "f(x)=...", "equation": "h(x)=..." }
|
| 300 |
+
|
| 301 |
CRITICAL INSTRUCTIONS:
|
| 302 |
1. Include ALL equations with '=' sign in function_equations!
|
| 303 |
2. **DATA INTEGRITY (V4.3.0):** Use the data below verbatim.
|
|
|
|
| 317 |
"""בניית הפרומפט המלכותי — המורה למתמטיקה V231.6 (Data Anchor)"""
|
| 318 |
features = _get_grade_features(grade, category)
|
| 319 |
relevant_rules = _detect_relevant_rules(problem_text, category)
|
| 320 |
+
rules_str = "\n".join([f" - {r}" for r in relevant_rules]) if relevant_rules else f" (לא זוהו כללים ספציפיים — בחר רק כללים רלוונטיים לקטגוריה {category})"
|
| 321 |
|
| 322 |
# Anchor Block — DATA INTEGRITY RULE
|
| 323 |
anchor_block = ""
|
|
|
|
| 338 |
CONSTRAINT: If the data says A(0,5), use A(0,5). If it contains f(x), solve that f(x).
|
| 339 |
"""
|
| 340 |
|
|
|
|
| 341 |
# V231.5: Gender-aware phrases
|
| 342 |
if student_gender == "F":
|
| 343 |
g = {
|
|
|
|
| 410 |
"concave_down": ["(b, c)"]
|
| 411 |
}
|
| 412 |
"""
|
| 413 |
+
prompt = f"""
|
| 414 |
+
DEPTH: {features['depth']}
|
| 415 |
+
STYLE: {features['style']} (בנימה של '{g['royal']}').
|
| 416 |
+
TONE: {features['tone']}
|
| 417 |
+
|
| 418 |
+
{anchor_block}
|
| 419 |
+
|
| 420 |
+
🎯 המשימה: פתור את התרגיל בצורה פדגוגית, מעצימה ובשלבים ברורים.
|
| 421 |
+
|
| 422 |
+
📜 כללי הפדגוגיה של BuddyMath (חובה!):
|
| 423 |
+
{rules_str}
|
| 424 |
|
| 425 |
{proof_block}
|
| 426 |
{investigation_block}
|
| 427 |
+
|
| 428 |
+
═══════════════════════════════════════════════════
|
| 429 |
+
הנחיות לפתרון (Solver Hint):
|
| 430 |
+
{solver_hint}
|
| 431 |
+
═══════════════════════════════════════════════════
|
| 432 |
+
|
| 433 |
+
השאלה לפתרון:
|
| 434 |
+
{problem_text}
|
| 435 |
+
|
| 436 |
+
דגימת סגנון פנייה:
|
| 437 |
+
- פתיחה: "{g['example_open']}"
|
| 438 |
+
- סיום: "{g['example_close']}"
|
| 439 |
"""
|
| 440 |
+
return prompt
|
| 441 |
|
| 442 |
|
| 443 |
# prompts.py - V275.1 (Safe OCR - Technique over Examples)
|
|
|
|
| 596 |
═══════════════════════════════════════════
|
| 597 |
ABSOLUTE RULES (violations cause immediate rejection):
|
| 598 |
═══════════════════════════════════════════
|
| 599 |
+
1. **DATA ANCHOR SUPREMACY (V8.9.2):** The equations in the JSON Data Anchor are the ABSOLUTE TRUTH. You MUST solve the exact math provided in the Anchor. Do NOT attempt to re-read the image for the main function; trust the pre-validated Data Anchor.
|
| 600 |
2. **ZERO MAGIC MATH (BABY STEPS):**
|
| 601 |
- NEVER skip algebraic steps. Show moving sides, dividing, and expanding brackets.
|
| 602 |
- ALWAYS explain the mathematical rule/theorem *BEFORE* applying it.
|
| 603 |
* Bad: "נגזור ונשווה לאפס: f'(x) = 2x"
|
| 604 |
* Good: "כדי למצוא נקודת קיצון נגזור את הפונקציה. מכיוון שזו מנה, נשתמש בכלל המנה האומר ש... נגזור את המונה בנפרד ואת המכנה בנפרד:"
|
| 605 |
3. **COMPLETE THE MISSION (ANTI-TRUNCATION):** Never abandon the task. You MUST reach the final numeric/algebraic answer. If asked for extrema, you must find x, y, and classify (max/min).
|
| 606 |
+
4. **ANTI-ASCII-ART RULE (V8.8.8):** NEVER attempt to draw or sketch a graph using text characters, keyboard symbols, slashes, or ASCII art (e.g., do not use |, /, \\, -, _ to make a picture). If asked to sketch a graph, ONLY describe its mathematical properties in text (e.g., intersections, asymptotes).
|
| 607 |
+
5. **UI CONTENT STRATEGY (CRITICAL - MATH SEPARATION & MULTI-STEP LOGIC):**
|
| 608 |
- `content_mixed`: ONLY for your warm Hebrew explanation and short inline variables (e.g. $x=5$). Do NOT put long equations, derivatives, or multi-line steps here!
|
| 609 |
- `block_math`: This is where the ACTUAL CALCULATION goes. It must contain the main equation or algebraic step in PURE LaTeX.
|
| 610 |
- IF there is any mathematical derivation or calculation in a step, it MUST go into `block_math` and NOT into `content_mixed`. NEVER put an equation on a new line inside `content_mixed`.
|
|
|
|
| 621 |
- State factually in `content_mixed`: "אני מזהה סתירה בנתונים, בואו נבדוק שוב את הפונקציה המקורית. על פי החישוב שלי [הסבר קצר...]."
|
| 622 |
- Focus on the TRUTH of your calculation. Never force a derivation to match a (likely misread) OCR error.
|
| 623 |
- **Mathematical Logic (V8.6.6):** If OCR is ambiguous (e.g. $e^x$ vs $e^{-x}$), use the overall context of the question (e.g., domain, asymptotes, or known behavior) to determine the logically correct formula.
|
| 624 |
+
8. **LATEX SYNTAX RULE (V8.6.9 — CRITICAL):**
|
| 625 |
+
- You MUST strictly use LaTeX macros for all mathematical functions.
|
| 626 |
+
- You MUST write \ln (with a backslash) and NEVER just ln.
|
| 627 |
+
- You MUST write \sin, \cos, \tan, \log.
|
| 628 |
+
- Failure to use the backslash will crash the KaTeX renderer and Fail validation.
|
| 629 |
+
9. **ANTI-NEWLINE RULE (V8.6.8):**
|
| 630 |
- In the `final_answer` field, NEVER use `\\` or `\newline` for line breaks.
|
| 631 |
- **BIDI SAFETY:** PROHIBIT Hebrew text (like "או", "וגם") inside `final_answer` or `block_math`.
|
| 632 |
- If there are multiple answers, separate them ONLY with English commas (e.g., "x=1, x=2").
|
| 633 |
+
10. **VARIABLE CONSISTENCY (V8.6.8):**
|
| 634 |
- Always use standard mathematical variables ($x, y, z, m, n$) as provided in context. Never invent new variable names not found in the Data Anchor or original problem.
|
| 635 |
10. **STRATEGY CARD NO-SPOILER RULE (CRITICAL):**
|
| 636 |
- The `strategy_card` MUST NOT contain any numbers, final equations, derivatives, or solutions.
|
|
|
|
| 663 |
- NEVER use data, parameters (like $a=1$), or specific values that explicitly belong to a LATER sub-question (e.g., Section ב') to solve an EARLIER sub-question (e.g., Section א').
|
| 664 |
- Solve earlier sections algebraically using general variables unless the data is part of the global question anchor.
|
| 665 |
- If a student's finding in Section א' is required for Section ב', you may use it, but NEVER the other way around.
|
| 666 |
+
17. **VISUAL GRAPH ANALYSIS (V8.9.1):** When asked to identify a graph from an image containing multiple options (e.g., I, II, III, IV), you MUST explicitly describe what you see in the image for EACH option before making a choice. Base your final selection strictly on matching your mathematical deductions (domain, asymptotes, roots) to the visual features of the graphs in the image. Ensure these descriptions are in `content_mixed` to maintain logical transparency for the student.
|
| 667 |
+
18. **NO GUESSING RULE (V310.0):** אם הנתונים בתמונה אינם מספיקים כדי לקבוע בוודאות איזה גרף מתאים לאזו פונקציה, אל תנחש! הצג את הניתוח המתמטי והסבר מה חסר כדי להגיע להכרעה. ניתן להוסיף: "מומלץ לבחון את הגרף המצורף (אם קיים) כדי לראות את המאפיינים שחישבנו."
|
| 668 |
|
| 669 |
|
| 670 |
+
═══════════════════════════════════════════════════
|
| 671 |
REQUIRED JSON STRUCTURE (EXACT KEYS):
|
| 672 |
+
═══════════════════════════════════════════════════
|
| 673 |
{{
|
| 674 |
{graph_field}
|
| 675 |
"strategy_card": {{
|
|
|
|
| 755 |
🎓 תפקיד: אתה בודקת שיעורי בית — מורה פרטית חמה שבודקת את העבודה של תלמיד.
|
| 756 |
🚫 אתה לא פותר את התרגיל מחדש! אתה מנתח את מה שהתלמיד כתב.
|
| 757 |
|
| 758 |
+
📸 היררכיית תמונות:
|
| 759 |
+
1. התמונה הראשונה (image_00) היא השאלה המקורית מהספר/מבחן.
|
| 760 |
+
2. כל שאר התמונות (image_01 ומעלה) מכילות את שלבי הפתרון שכתב התלמיד בכתב יד.
|
| 761 |
+
|
| 762 |
👤 התלמיד: {student_name}, כיתה {grade}.
|
| 763 |
👑 מגדר: {"נקבה" if student_gender == "F" else "זכר"}. השתמש/י בלשון מתאימה.
|
| 764 |
|
|
|
|
| 884 |
"formulas_to_remember": ["LaTeX נוסחה 1", "LaTeX נוסחה 2"],
|
| 885 |
"tts_speech": "טקסט דיבור עברי נקי ל-TTS"
|
| 886 |
}}
|
| 887 |
+
"""
|
| 888 |
|
| 889 |
+
def get_anchor_validation_prompt(ocr_json: dict) -> str:
|
| 890 |
+
"""
|
| 891 |
+
V8.9.2: Dedicated prompt for the Orchestrator's Data Anchor Validator.
|
| 892 |
+
Takes the raw OCR JSON and the image to produce a syntactically perfect 'Absolute Truth'.
|
| 893 |
+
"""
|
| 894 |
+
ocr_str = json.dumps(ocr_json, indent=2, ensure_ascii=False)
|
| 895 |
+
|
| 896 |
+
return f"""
|
| 897 |
+
You are a strict Mathematical Transcriber & Validator.
|
| 898 |
+
Look at the provided OCR JSON and the original image.
|
| 899 |
+
|
| 900 |
+
OCR JSON (Raw Extraction):
|
| 901 |
+
{ocr_str}
|
| 902 |
+
|
| 903 |
+
⚠️ MISSION:
|
| 904 |
+
The OCR often corrupts complex fractions, missing brackets, or mangling operators.
|
| 905 |
+
Your ONLY job is to output a verified, syntactically perfect JSON containing the main mathematical functions and parameters exactly as they appear in the image.
|
| 906 |
+
|
| 907 |
+
**CRITICAL VISION PROTOCOL:**
|
| 908 |
+
You are a strict Visual Validator, NOT a text auto-completer. When evaluating the extracted math, you MUST cross-reference the text directly against the ORIGINAL IMAGE.
|
| 909 |
+
If the input string is syntactically broken, truncated, or missing components (e.g., dropped fractions, missing multipliers outside parentheses), DO NOT delete terms to artificially 'fix' the equation.
|
| 910 |
+
You MUST visually reconstruct the EXACT mathematical expression pixel-for-pixel as it appears in the image. Do not hallucinate values (e.g., changing $e$ to $\sqrt{e}$). MATCH THE PIXELS EXACTLY.
|
| 911 |
+
|
| 912 |
+
RULES:
|
| 913 |
+
1. Fix any hanging operators (e.g., 'a+', 'x-').
|
| 914 |
+
2. Restore missing multipliers or fractions (e.g., if image shows '1/x' but OCR missed it).
|
| 915 |
+
3. Ensure all brackets are closed and standard variables are used.
|
| 916 |
+
4. If the OCR is correct, keep it as is.
|
| 917 |
+
5. CRITICAL: Do NOT solve the problem. Do NOT explain.
|
| 918 |
+
6. Output ONLY the corrected JSON following the exact structure of the input.
|
| 919 |
+
7. V8.9.2: If you fix a critical syntax error (like 'a+' -> 'a+1/x^2'), ensure the final result is mathematically plausible based on the visual evidence.
|
| 920 |
+
8. **HORIZONTAL VERIFICATION (V8.9.3):** Check the horizontal order of elements. If an element is to the left of a bracket in the image, it MUST be to the left of the bracket in your JSON. Do NOT let the Hebrew text flow (RTL) swap the positions of multipliers or terms.
|
| 921 |
+
9. **CRITICAL JSON STRUCTURE RULE (V8.9.4):** Every output MUST be a flat JSON dictionary with UNIQUE keys. Do NOT use duplicate keys like "equation" multiple times. If there are multiple equations, use "equation_1", "equation_2", etc.
|
| 922 |
+
|
| 923 |
+
Return ONLY the corrected JSON.
|
| 924 |
"""
|
strategy_manager.py
CHANGED
|
@@ -68,7 +68,7 @@ System Prompt Override: הוסף לתחילת ההסבר שלך את ההערה
|
|
| 68 |
"""
|
| 69 |
prompt = soft_recovery_note + "\n" + prompt
|
| 70 |
|
| 71 |
-
prompt
|
| 72 |
llm_response = await self._call_llm(prompt, image_data, category, image_pages, proof_graph_steps_count)
|
| 73 |
|
| 74 |
# V5.8.2: Guard against raw list responses
|
|
@@ -127,7 +127,7 @@ System Prompt Override: הוסף לתחילת ההסבר שלך את ההערה
|
|
| 127 |
async def _call_llm(self, prompt, image_data, category, image_pages, proof_graph_steps_count=1):
|
| 128 |
# V8.5: No More Patchwork. Use centralized V4.3.0 standard.
|
| 129 |
v430_instruction = prompts.get_master_prompt_v430(category=category, problem_text=prompt)
|
| 130 |
-
prompt
|
| 131 |
|
| 132 |
from google.generativeai.types import GenerationConfig
|
| 133 |
import asyncio
|
|
|
|
| 68 |
"""
|
| 69 |
prompt = soft_recovery_note + "\n" + prompt
|
| 70 |
|
| 71 |
+
prompt = (prompt or "") + f"\n\n🎯 MISSION: Solve ONLY part: {problem_text}"
|
| 72 |
llm_response = await self._call_llm(prompt, image_data, category, image_pages, proof_graph_steps_count)
|
| 73 |
|
| 74 |
# V5.8.2: Guard against raw list responses
|
|
|
|
| 127 |
async def _call_llm(self, prompt, image_data, category, image_pages, proof_graph_steps_count=1):
|
| 128 |
# V8.5: No More Patchwork. Use centralized V4.3.0 standard.
|
| 129 |
v430_instruction = prompts.get_master_prompt_v430(category=category, problem_text=prompt)
|
| 130 |
+
prompt = (prompt or "") + (v430_instruction or "")
|
| 131 |
|
| 132 |
from google.generativeai.types import GenerationConfig
|
| 133 |
import asyncio
|
visuals.py
CHANGED
|
@@ -7,8 +7,15 @@ from sympy import sympify, symbols, lambdify, Abs, sin, cos, tan, sqrt, log, ln,
|
|
| 7 |
|
| 8 |
print("✅ 🟢 [BIT-LOG: Visuals V277.0] - Finer Lines + Grid Optimization")
|
| 9 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
def sanitize_math_for_sympy(expr_str: str) -> str:
|
| 11 |
if not expr_str: return ""
|
|
|
|
| 12 |
|
| 13 |
# 1. Strip Hebrew
|
| 14 |
expr_str = re.sub(r'[\u0590-\u05FF]', '', expr_str)
|
|
@@ -97,25 +104,31 @@ def _run_with_timeout(func, args, timeout_duration=2.0, default_value=None):
|
|
| 97 |
return default_value
|
| 98 |
|
| 99 |
def generate_plot(latex_input: str, context_text: str = "", geometric_entities: dict = None) -> str:
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
is_implicit = "=" in safe_expr_first or ("x" in safe_expr_first and "y" in safe_expr_first and "**2" in safe_expr_first) or has_real_geo
|
| 115 |
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 119 |
|
| 120 |
def _plot_func(expr_str):
|
| 121 |
try:
|
|
@@ -145,6 +158,10 @@ def _plot_func(expr_str):
|
|
| 145 |
|
| 146 |
for idx, single_expr in enumerate(expressions):
|
| 147 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 148 |
# V8.6.9: Explicit try-except for SYMPY_PARSE_ERROR prevention
|
| 149 |
try:
|
| 150 |
expr = _run_with_timeout(sympify, (single_expr, None, local_dict), timeout_duration=2.0)
|
|
@@ -154,10 +171,16 @@ def _plot_func(expr_str):
|
|
| 154 |
|
| 155 |
if not expr: continue
|
| 156 |
|
| 157 |
-
#
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 161 |
|
| 162 |
f_np = lambdify(x, expr, modules=['numpy'])
|
| 163 |
y_vals = f_np(x_vals)
|
|
|
|
| 7 |
|
| 8 |
print("✅ 🟢 [BIT-LOG: Visuals V277.0] - Finer Lines + Grid Optimization")
|
| 9 |
|
| 10 |
+
def clean_latex_for_sympy(latex_str):
|
| 11 |
+
"""V318.0: Conerts |x| to Abs(x) and \ln to log for SymPy compatibility."""
|
| 12 |
+
cleaned = re.sub(r'\|(.*?)\|', r'Abs(\1)', latex_str)
|
| 13 |
+
cleaned = cleaned.replace(r'\ln', 'log')
|
| 14 |
+
return cleaned
|
| 15 |
+
|
| 16 |
def sanitize_math_for_sympy(expr_str: str) -> str:
|
| 17 |
if not expr_str: return ""
|
| 18 |
+
expr_str = clean_latex_for_sympy(expr_str)
|
| 19 |
|
| 20 |
# 1. Strip Hebrew
|
| 21 |
expr_str = re.sub(r'[\u0590-\u05FF]', '', expr_str)
|
|
|
|
| 104 |
return default_value
|
| 105 |
|
| 106 |
def generate_plot(latex_input: str, context_text: str = "", geometric_entities: dict = None) -> str:
|
| 107 |
+
try:
|
| 108 |
+
print(f"📈 [VISUALS] generate_plot called with: latex='{latex_input}', geo_entities={list(geometric_entities.keys()) if geometric_entities else None}")
|
| 109 |
+
# We still sanitize the full input, but we don't split it by comma unconditionally
|
| 110 |
+
# geometry check uses the first element to avoid implicit matches on secondary graphs
|
| 111 |
+
first_expr = latex_input.split(',')[0].strip()
|
| 112 |
+
safe_expr_first = sanitize_math_for_sympy(first_expr)
|
| 113 |
+
|
| 114 |
+
# We sanitize the FULL input to pass to _plot_func
|
| 115 |
+
safe_expr_full = sanitize_math_for_sympy(latex_input)
|
| 116 |
+
|
| 117 |
+
# בדיקה חכמה: האם באמת יש ישויות גיאומטריות מלאות?
|
| 118 |
+
has_real_geo = False
|
| 119 |
+
if geometric_entities:
|
| 120 |
+
has_real_geo = any(len(v) > 0 for v in geometric_entities.values() if isinstance(v, list))
|
|
|
|
| 121 |
|
| 122 |
+
is_implicit = "=" in safe_expr_first or ("x" in safe_expr_first and "y" in safe_expr_first and "**2" in safe_expr_first) or has_real_geo
|
| 123 |
+
|
| 124 |
+
if is_implicit:
|
| 125 |
+
return _plot_geo(safe_expr_first, context_text, geometric_entities)
|
| 126 |
+
return _plot_func(safe_expr_full)
|
| 127 |
+
except Exception as e:
|
| 128 |
+
print(f"📈 🔴 [BIT-LOG] CRITICAL PLOT ERROR: {e}")
|
| 129 |
+
import traceback
|
| 130 |
+
traceback.print_exc()
|
| 131 |
+
return None
|
| 132 |
|
| 133 |
def _plot_func(expr_str):
|
| 134 |
try:
|
|
|
|
| 158 |
|
| 159 |
for idx, single_expr in enumerate(expressions):
|
| 160 |
try:
|
| 161 |
+
# V318.0: Robust sanitization within the loop
|
| 162 |
+
single_expr = sanitize_math_for_sympy(single_expr)
|
| 163 |
+
if not single_expr: continue
|
| 164 |
+
|
| 165 |
# V8.6.9: Explicit try-except for SYMPY_PARSE_ERROR prevention
|
| 166 |
try:
|
| 167 |
expr = _run_with_timeout(sympify, (single_expr, None, local_dict), timeout_duration=2.0)
|
|
|
|
| 171 |
|
| 172 |
if not expr: continue
|
| 173 |
|
| 174 |
+
# Ticket 1 Fix: Handle free symbols (parameters like a, b, k)
|
| 175 |
+
# We only want to plot with respect to 'x', so replace everything else with 1
|
| 176 |
+
expr_symbols = expr.free_symbols
|
| 177 |
+
params = [s for s in expr_symbols if s != x]
|
| 178 |
+
if params:
|
| 179 |
+
print(f"🛠️ [VISUALS] Substituting parameters {params} with 1 for sketch.")
|
| 180 |
+
expr = expr.subs({s: 1 for s in params})
|
| 181 |
+
|
| 182 |
+
# Final check if the result is still symbolic
|
| 183 |
+
if not expr.is_finite: continue
|
| 184 |
|
| 185 |
f_np = lambdify(x, expr, modules=['numpy'])
|
| 186 |
y_vals = f_np(x_vals)
|