Spaces:

dotandru
/

BuddyMath

Sleeping

App Files Files Community

BuddyMath / audio_generator.py

dotandru

Fix: Clean production deployment with sse-starlette

9d29c62 3 months ago

raw

history blame contribute delete

9.64 kB

	# audio_generator.py - V273.0 (Google Cloud TTS - High Quality Hebrew)
	import asyncio
	import base64
	import os
	import tempfile
	import logging

	# Configure Logging
	logger = logging.getLogger(__name__)

	# ═══════════════════════════════════════════════════════════════
	# 🎙️ Google Cloud TTS Configuration
	# ═══════════════════════════════════════════════════════════════
	#
	# קולות עבריים זמינים:
	# - he-IL-Wavenet-A (נקבה, איכות גבוהה) ⭐ מומלץ
	# - he-IL-Wavenet-B (זכר, איכות גבוהה)
	# - he-IL-Standard-A (נקבה, איכות רגילה)
	# - he-IL-Standard-B (זכר, איכות רגילה)
	#
	# Free Tier: 1 מיליון תווים/חודש (WaveNet: 1M, Standard: 4M)
	# ═══════════════════════════════════════════════════════════════

	GOOGLE_VOICE_NAME = "he-IL-Wavenet-A" # Female, high quality
	GOOGLE_LANGUAGE_CODE = "he-IL"
	SPEAKING_RATE = 0.95 # מעט יותר איטי לבהירות
	PITCH = 1.0 # גובה קול רגיל

	# Fallback to edge-tts if Google Cloud not configured
	USE_EDGE_TTS_FALLBACK = True
	EDGE_TTS_VOICE = "he-IL-HilaNeural"

	from firebase_manager import firebase_manager # V261.17


	def _is_google_cloud_configured() -> bool:
	"""בדיקה אם Google Cloud מוגדר"""
	# Option 1: Environment variable
	if os.environ.get("GOOGLE_APPLICATION_CREDENTIALS"):
	return True
	# Option 2: Check for credentials file in common locations
	common_paths = [
	"/app/google-credentials.json",
	"./google-credentials.json",
	os.path.expanduser("~/.config/gcloud/application_default_credentials.json")
	]
	for path in common_paths:
	if os.path.exists(path):
	os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = path
	return True
	return False


	async def _generate_with_google_cloud(text: str, output_path: str) -> bool:
	"""
	יצירת אודיו עם Google Cloud TTS
	מחזיר True אם הצליח, False אם נכשל
	"""
	try:
	from google.cloud import texttospeech

	# Create client
	client = texttospeech.TextToSpeechClient()

	# Build the voice request
	voice = texttospeech.VoiceSelectionParams(
	language_code=GOOGLE_LANGUAGE_CODE,
	name=GOOGLE_VOICE_NAME,
	)

	# Select the audio format
	audio_config = texttospeech.AudioConfig(
	audio_encoding=texttospeech.AudioEncoding.MP3,
	speaking_rate=SPEAKING_RATE,
	pitch=PITCH,
	)

	# Build the synthesis input
	synthesis_input = texttospeech.SynthesisInput(text=text)

	# Perform the text-to-speech request
	logger.info(f"🎙️ Google Cloud TTS: Generating audio for {len(text)} chars...")

	# Run in thread pool to not block async
	loop = asyncio.get_running_loop()
	response = await loop.run_in_executor(
	None,
	lambda: client.synthesize_speech(
	input=synthesis_input,
	voice=voice,
	audio_config=audio_config
	)
	)

	# Write the audio content to file
	with open(output_path, "wb") as out:
	out.write(response.audio_content)

	logger.info(f"✅ Google Cloud TTS: Audio saved to {output_path}")
	return True

	except ImportError:
	logger.warning("⚠️ google-cloud-texttospeech not installed. Run: pip install google-cloud-texttospeech")
	return False
	except Exception as e:
	logger.error(f"❌ Google Cloud TTS failed: {e}")
	return False


	async def _generate_with_edge_tts(text: str, output_path: str) -> bool:
	"""
	יצירת אודיו עם edge-tts (Fallback)
	"""
	try:
	import edge_tts

	logger.info(f"🎙️ Edge TTS (Fallback): Generating audio...")
	communicate = edge_tts.Communicate(text, EDGE_TTS_VOICE)
	await communicate.save(output_path)

	logger.info(f"✅ Edge TTS: Audio saved to {output_path}")
	return True

	except Exception as e:
	logger.error(f"❌ Edge TTS failed: {e}")
	return False


	async def generate_teacher_audio(text: str, output_path: str = None) -> str:
	"""
	V273.0: יצירת אודיו עם Google Cloud TTS (איכות גבוהה)

	מנסה קודם Google Cloud TTS, אם לא מוגדר/נכשל → edge-tts fallback

	Returns:
	- Public URL (if Firebase upload success)
	- Base64 string (fallback)
	- None (if all failed)
	"""
	try:
	if not text:
	return None

	# Clean text for TTS (remove emojis and special chars that cause issues)
	clean_text = _clean_text_for_tts(text)

	if not clean_text:
	return None

	logger.info(f"🎙️ TTS Request: {clean_text[:50]}...")

	# Determine output path
	if output_path:
	os.makedirs(os.path.dirname(output_path), exist_ok=True)
	final_path = output_path
	else:
	timestamp = int(asyncio.get_event_loop().time() * 1000)
	final_path = os.path.join(tempfile.gettempdir(), f"audio_{timestamp}.mp3")

	# Try Google Cloud TTS first
	success = False
	if _is_google_cloud_configured():
	success = await _generate_with_google_cloud(clean_text, final_path)
	else:
	logger.info("ℹ️ Google Cloud not configured, using Edge TTS")

	# Fallback to edge-tts
	if not success and USE_EDGE_TTS_FALLBACK:
	success = await _generate_with_edge_tts(clean_text, final_path)

	if not success:
	logger.error("❌ All TTS methods failed")
	return None

	# Try Firebase Upload
	try:
	blob_name = f"audio/{os.path.basename(final_path)}"
	loop = asyncio.get_running_loop()

	public_url = await loop.run_in_executor(
	None,
	lambda: firebase_manager.upload_file(final_path, blob_name)
	)

	if public_url:
	logger.info(f"☁️ Firebase URL: {public_url}")
	# Clean up local file
	if not output_path:
	os.remove(final_path)
	return public_url
	except Exception as fb_err:
	logger.warning(f"⚠️ Firebase upload failed ({fb_err}). Using Base64.")

	# Fallback: Return Base64
	with open(final_path, "rb") as audio_file:
	audio_bytes = audio_file.read()
	audio_base64 = base64.b64encode(audio_bytes).decode('utf-8')

	# Clean up temp file
	if not output_path:
	os.remove(final_path)

	return audio_base64

	except Exception as e:
	logger.error(f"❌ TTS Generation Failed: {e}")
	return None


	def _clean_text_for_tts(text: str) -> str:
	"""
	ניקוי טקסט לפני TTS - הסרת אימוג'ים וסימנים בעייתיים
	"""
	import re

	if not text:
	return ""

	# Remove emojis
	emoji_pattern = re.compile("["
	u"\U0001F600-\U0001F64F" # emoticons
	u"\U0001F300-\U0001F5FF" # symbols & pictographs
	u"\U0001F680-\U0001F6FF" # transport & map symbols
	u"\U0001F1E0-\U0001F1FF" # flags
	u"\U00002702-\U000027B0"
	u"\U000024C2-\U0001F251"
	"]+", flags=re.UNICODE)

	clean = emoji_pattern.sub('', text)

	# Remove multiple spaces
	clean = re.sub(r'\s+', ' ', clean)

	# Remove LaTeX remnants that might have slipped through
	clean = clean.replace('$', '').replace('\\', '')

	return clean.strip()


	# ═══════════════════════════════════════════════════════════════
	# 🧪 Testing
	# ═══════════════════════════════════════════════════════════════

	if __name__ == "__main__":
	async def main():
	text = """
	איזה יופי של תרגיל! היינו צריכים למצוא את נקודות הקיצון של הפונקציה.
	השתמשנו בנגזרת ראשונה כדי למצוא איפה השיפוע מתאפס.
	הטריק לזכור - נגזרת אפס תמיד מסמנת נקודת קיצון אפשרית.
	כל הכבוד על ההתמדה!
	"""

	print(f"🎙️ Testing TTS...")
	print(f"📝 Text length: {len(text)} chars")
	print(f"☁️ Google Cloud configured: {_is_google_cloud_configured()}")

	result = await generate_teacher_audio(text)

	if result:
	if result.startswith("http"):
	print(f"✅ Got URL: {result}")
	else:
	print(f"✅ Got Base64: {len(result)} chars")
	else:
	print("❌ TTS failed")

	asyncio.run(main())