Spaces:

dotandru
/

BuddyMath

Sleeping

App Files Files Community

BuddyMath / math_sanitizer.py

dotandru

Fix: Clean production deployment with sse-starlette

9d29c62 3 months ago

raw

history blame contribute delete

4.13 kB

	# math_sanitizer.py - V1.1 ProductionMathSanitizer
	import re
	import logging

	logger = logging.getLogger(__name__)

	class ProductionMathSanitizer:
	@staticmethod
	def normalize_latex(latex_str: str) -> str:
	"""
	V1.1: Standardizes LaTeX for SymPy and LLM comparison.
	"""
	if not latex_str: return ""

	# 1. Basic Cleaning
	clean = latex_str.strip()
	clean = clean.replace(r'\ ', '')
	clean = clean.replace(r'\times', '*')
	clean = clean.replace(r'\cdot', '*')

	# 2. Bracket Normalization
	clean = clean.replace(r'\left(', '(').replace(r'\right)', ')')
	clean = clean.replace(r'\left[', '[').replace(r'\right]', ']')
	clean = clean.replace('{', '(').replace('}', ')')

	# 3. Fractions
	while r'\frac' in clean:
	clean = re.sub(r'\\frac\s\((.?)\)\((.*?)\)', r'(\1)/(\2)', clean)
	if r'\frac' in clean and '(' not in clean: # Fallback for simple fractions
	clean = re.sub(r'\\frac\s(.?)\s(.?)', r'(\1)/(\2)', clean)

	# 4. Implicit Multiplication Guard (V1.1)
	clean = re.sub(r'(\d)([a-zA-Z(])', r'\1*\2', clean)
	clean = re.sub(r'\)([a-zA-Z0-9(])', r')*\1', clean)

	return clean

	@staticmethod
	def validate_semantic_completeness(anchor_data: dict, formula_tokens: list[str]) -> bool:
	"""
	V1.1: Partial Semantic Recovery Check.
	Returns True if the missing tokens are non-critical.
	"""
	# Logic to check if critical variables/values are missing
	# For now, a simple check if the main function key is present.
	critical_keys = ['function_equations', 'equations']
	for key in critical_keys:
	if key in anchor_data and anchor_data[key]:
	return True
	return False

	@staticmethod
	def get_symbolic_bridge(proof_graph) -> str:
	"""
	V1.1: Zero Hallucination Bridge.
	Converts the Immutable ProofGraph to a clean mathematical context for the LLM.
	"""
	bridge = "════════════════════════════════════════\n"
	bridge += "📜 VERIFIED SYMBOLIC BRIDGE (V1.1):\n"
	bridge += "════════════════════════════════════════\n"
	for step in proof_graph.steps:
	bridge += f"Step {step.step_id}: {step.math_content} ({step.logic_description or ''})\n"

	# V6 Ontology Injection
	if hasattr(step, 'allowed_concepts') and getattr(step, 'allowed_concepts'):
	concepts_str = ", ".join(step.allowed_concepts)
	tag = getattr(step, 'pedagogical_tag', 'כללי')
	bridge += f"For step {step.step_id}, your pedagogical_tag is '{tag}'. You MUST build your explanation using ONLY the concepts from this list: [{concepts_str}]. Do NOT introduce any other mathematical concepts. Keep it under 2 sentences.\n"

	bridge += "════════════════════════════════════════\n"
	bridge += "RULE: USE ONLY THE DATA ABOVE. DO NOT HALLUCINATE OR CHANGE MATH.\n"
	return bridge

	def sanitize_math_ocr_hotfix(text: str) -> str:
	"""
	V1.1.1 Aggressive Sanitizer: Removes all spaces and fixes frac regex.
	Fixes failures caused by leading spaces or visual artifacts.
	"""
	if not text: return ""

	# תיקון קריטי: הסרת כל הרווחים למניעת כשלי Regex (פתרון לשאלה 2 ו-3)
	text = text.replace(" ", "")

	# ניקוי שאריות ויזואליות
	text = text.replace("\\left", "").replace("\\right", "")

	# נרמול שברים (עובד עכשיו על מחרוזת נקייה מרווחים)
	import re
	text = re.sub(
	r"frac\(([^()]+)\)\(([^()]+)\)",
	lambda m: f"(({m.group(1)})/({m.group(2)}))",
	text
	)
	return text.strip()