Spaces:

dotandru
/

BuddyMath

Sleeping

App Files Files Community

BuddyMath / tests /verify_core_v11.py

dotandru

Fix: Clean production deployment with sse-starlette

9d29c62 3 months ago

raw

history blame

3.65 kB

	import json
	import asyncio
	import numpy as np
	from ocr_strip_engine import get_best_sniper_roi, apply_conditional_homography
	from proof_graph import ProofGraph, ProofStep
	from math_sanitizer import ProductionMathSanitizer
	from pedagogical_builder import build_pedagogical_response

	async def verify_core_v11():
	print("🚀 Starting BuddyMath Core V1.1 Verification Suite")

	# 1. Test Layer 1: Vision & Scene Intelligence
	print("\n👁️ Testing Layer 1: Vision Architecture")
	mock_img = np.zeros((1000, 1000, 3), dtype=np.uint8)
	# Simulate a dense math block (white pixels)
	mock_img[100:150, 200:400] = 255

	roi, confidence = get_best_sniper_roi(mock_img)
	print(f" - ROI Confidence (Heatmap Prior): {confidence:.2f}")
	assert confidence > 0.1, "Heatmap prior scoring failure"

	homography_result = apply_conditional_homography(roi)
	print(" - Conditional Homography applied successfully.")
	assert homography_result.shape[0] > 0, "Homography returned empty image"

	# 2. Test Layer 2: Math Safety Lock
	print("\n🔒 Testing Layer 2: Math Safety Lock")
	proof_steps = [
	ProofStep(1, "f(x) = x^2", "Initial Function"),
	ProofStep(2, "f'(x) = 2x", "Derivative Step")
	]
	pg = ProofGraph(proof_steps)
	bridge = ProductionMathSanitizer.get_symbolic_bridge(pg)
	print(" - Symbolic Bridge generated:")
	print(f" {bridge.splitlines()[3]}") # Show one line
	assert "VERIFIED SYMBOLIC BRIDGE" in bridge
	assert "f(x) = x^2" in bridge

	sanitized = ProductionMathSanitizer.normalize_latex(r"\frac{x}{2}")
	print(f" - Math Sanitizer (Fraction): {sanitized}")
	assert sanitized == "(x)/(2)"

	# 3. Test Layer 3: Pedagogical Engine (LOPA)
	print("\n🎓 Testing Layer 3: Pedagogical Engine")
	mock_llm_output = {
	"solution_markdown": "Test solution with steps.",
	"confidence_score": 0.9,
	"sections": [
	{
	"title": "Steps",
	"steps": [
	{"title": "Step 1"}, {"title": "Step 2"},
	{"title": "Step 3"}, {"title": "Step 4"}
	]
	}
	]
	}

	response = build_pedagogical_response("GENERAL", mock_llm_output, {})
	steps = response['sections'][0]['steps']
	print(f" - Cognitive Load Limiter (Step Count): {len(steps)}")

	visible_count = sum(1 for s in steps if s.get("disclosure_state") == "VISIBLE")
	hidden_count = sum(1 for s in steps if s.get("disclosure_state") == "HIDDEN")
	print(f" - Visible: {visible_count}, Hidden: {hidden_count}")

	# Priority bypass (from previous hotfix) might return 1 step, but let's check the logic
	# Actually, build_pedagogical_response with template usually has more steps.
	# But wait, if llm_output HAS solution_markdown, it returns 1 step.
	# Let's test with a template-style output (no solution_markdown)
	mock_template_output = {
	"confidence_score": 0.9,
	"sections": [
	{
	"title": "Steps",
	"steps": [{"t":1}, {"t":2}, {"t":3}, {"t":4}, {"t":5}]
	}
	]
	}
	response2 = build_pedagogical_response("LINEAR_EQUATION", mock_template_output, {})
	steps2 = response2['sections'][0]['steps']
	visible_count2 = sum(1 for s in steps2 if s.get("disclosure_state") == "VISIBLE")
	print(f" - Template disclosure: {visible_count2} visible")
	assert visible_count2 <= 3, "Cognitive Load Limiter failed to hide steps"

	print("\n🏆 BuddyMath Core V1.1 Verified!")

	if __name__ == "__main__":
	asyncio.run(verify_core_v11())