| |
| import re |
| import logging |
|
|
| logger = logging.getLogger(__name__) |
|
|
| class ProductionMathSanitizer: |
| @staticmethod |
| def normalize_latex(latex_str: str) -> str: |
| """ |
| V1.1: Standardizes LaTeX for SymPy and LLM comparison. |
| """ |
| if not latex_str: return "" |
| |
| |
| clean = latex_str.strip() |
| clean = clean.replace(r'\ ', '') |
| clean = clean.replace(r'\times', '*') |
| clean = clean.replace(r'\cdot', '*') |
| |
| |
| clean = clean.replace(r'\left(', '(').replace(r'\right)', ')') |
| clean = clean.replace(r'\left[', '[').replace(r'\right]', ']') |
| clean = clean.replace('{', '(').replace('}', ')') |
| |
| |
| while r'\frac' in clean: |
| clean = re.sub(r'\\frac\s*\((.*?)\)\((.*?)\)', r'(\1)/(\2)', clean) |
| if r'\frac' in clean and '(' not in clean: |
| clean = re.sub(r'\\frac\s*(.*?)\s*(.*?)', r'(\1)/(\2)', clean) |
| |
| |
| clean = re.sub(r'(\d)([a-zA-Z(])', r'\1*\2', clean) |
| clean = re.sub(r'\)([a-zA-Z0-9(])', r')*\1', clean) |
| |
| return clean |
|
|
| @staticmethod |
| def validate_semantic_completeness(anchor_data: dict, formula_tokens: list[str]) -> bool: |
| """ |
| V1.1: Partial Semantic Recovery Check. |
| Returns True if the missing tokens are non-critical. |
| """ |
| |
| |
| critical_keys = ['function_equations', 'equations'] |
| for key in critical_keys: |
| if key in anchor_data and anchor_data[key]: |
| return True |
| return False |
|
|
| @staticmethod |
| def get_symbolic_bridge(proof_graph) -> str: |
| """ |
| V1.1: Zero Hallucination Bridge. |
| Converts the Immutable ProofGraph to a clean mathematical context for the LLM. |
| """ |
| bridge = "鈺愨晲鈺愨晲鈺愨晲鈺愨晲鈺愨晲鈺愨晲鈺愨晲鈺愨晲鈺愨晲鈺愨晲鈺愨晲鈺愨晲鈺愨晲鈺愨晲鈺愨晲鈺愨晲鈺愨晲鈺愨晲鈺愨晲鈺愨晲\n" |
| bridge += "馃摐 VERIFIED SYMBOLIC BRIDGE (V1.1):\n" |
| bridge += "鈺愨晲鈺愨晲鈺愨晲鈺愨晲鈺愨晲鈺愨晲鈺愨晲鈺愨晲鈺愨晲鈺愨晲鈺愨晲鈺愨晲鈺愨晲鈺愨晲鈺愨晲鈺愨晲鈺愨晲鈺愨晲鈺愨晲鈺愨晲\n" |
| for step in proof_graph.steps: |
| bridge += f"Step {step.step_id}: {step.math_content} ({step.logic_description or ''})\n" |
| |
| |
| if hasattr(step, 'allowed_concepts') and getattr(step, 'allowed_concepts'): |
| concepts_str = ", ".join(step.allowed_concepts) |
| tag = getattr(step, 'pedagogical_tag', '讻诇诇讬') |
| bridge += f"For step {step.step_id}, your pedagogical_tag is '{tag}'. You MUST build your explanation using ONLY the concepts from this list: [{concepts_str}]. Do NOT introduce any other mathematical concepts. Keep it under 2 sentences.\n" |
|
|
| bridge += "鈺愨晲鈺愨晲鈺愨晲鈺愨晲鈺愨晲鈺愨晲鈺愨晲鈺愨晲鈺愨晲鈺愨晲鈺愨晲鈺愨晲鈺愨晲鈺愨晲鈺愨晲鈺愨晲鈺愨晲鈺愨晲鈺愨晲鈺愨晲\n" |
| bridge += "RULE: USE ONLY THE DATA ABOVE. DO NOT HALLUCINATE OR CHANGE MATH.\n" |
| return bridge |
|
|
| def sanitize_math_ocr_hotfix(text: str) -> str: |
| """ |
| V1.1.1 Aggressive Sanitizer: Removes all spaces and fixes frac regex. |
| Fixes failures caused by leading spaces or visual artifacts. |
| """ |
| if not text: return "" |
|
|
| |
| text = text.replace(" ", "") |
|
|
| |
| text = text.replace("\\left", "").replace("\\right", "") |
|
|
| |
| import re |
| text = re.sub( |
| r"frac\(([^()]+)\)\(([^()]+)\)", |
| lambda m: f"(({m.group(1)})/({m.group(2)}))", |
| text |
| ) |
| return text.strip() |
|
|