BuddyMath / audio_generator.py
dotandru's picture
Fix: Clean production deployment with sse-starlette
9d29c62
# audio_generator.py - V273.0 (Google Cloud TTS - High Quality Hebrew)
import asyncio
import base64
import os
import tempfile
import logging
# Configure Logging
logger = logging.getLogger(__name__)
# ═══════════════════════════════════════════════════════════════
# 🎙️ Google Cloud TTS Configuration
# ═══════════════════════════════════════════════════════════════
#
# קולות עבריים זמינים:
# - he-IL-Wavenet-A (נקבה, איכות גבוהה) ⭐ מומלץ
# - he-IL-Wavenet-B (זכר, איכות גבוהה)
# - he-IL-Standard-A (נקבה, איכות רגילה)
# - he-IL-Standard-B (זכר, איכות רגילה)
#
# Free Tier: 1 מיליון תווים/חודש (WaveNet: 1M, Standard: 4M)
# ═══════════════════════════════════════════════════════════════
GOOGLE_VOICE_NAME = "he-IL-Wavenet-A" # Female, high quality
GOOGLE_LANGUAGE_CODE = "he-IL"
SPEAKING_RATE = 0.95 # מעט יותר איטי לבהירות
PITCH = 1.0 # גובה קול רגיל
# Fallback to edge-tts if Google Cloud not configured
USE_EDGE_TTS_FALLBACK = True
EDGE_TTS_VOICE = "he-IL-HilaNeural"
from firebase_manager import firebase_manager # V261.17
def _is_google_cloud_configured() -> bool:
"""בדיקה אם Google Cloud מוגדר"""
# Option 1: Environment variable
if os.environ.get("GOOGLE_APPLICATION_CREDENTIALS"):
return True
# Option 2: Check for credentials file in common locations
common_paths = [
"/app/google-credentials.json",
"./google-credentials.json",
os.path.expanduser("~/.config/gcloud/application_default_credentials.json")
]
for path in common_paths:
if os.path.exists(path):
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = path
return True
return False
async def _generate_with_google_cloud(text: str, output_path: str) -> bool:
"""
יצירת אודיו עם Google Cloud TTS
מחזיר True אם הצליח, False אם נכשל
"""
try:
from google.cloud import texttospeech
# Create client
client = texttospeech.TextToSpeechClient()
# Build the voice request
voice = texttospeech.VoiceSelectionParams(
language_code=GOOGLE_LANGUAGE_CODE,
name=GOOGLE_VOICE_NAME,
)
# Select the audio format
audio_config = texttospeech.AudioConfig(
audio_encoding=texttospeech.AudioEncoding.MP3,
speaking_rate=SPEAKING_RATE,
pitch=PITCH,
)
# Build the synthesis input
synthesis_input = texttospeech.SynthesisInput(text=text)
# Perform the text-to-speech request
logger.info(f"🎙️ Google Cloud TTS: Generating audio for {len(text)} chars...")
# Run in thread pool to not block async
loop = asyncio.get_running_loop()
response = await loop.run_in_executor(
None,
lambda: client.synthesize_speech(
input=synthesis_input,
voice=voice,
audio_config=audio_config
)
)
# Write the audio content to file
with open(output_path, "wb") as out:
out.write(response.audio_content)
logger.info(f"✅ Google Cloud TTS: Audio saved to {output_path}")
return True
except ImportError:
logger.warning("⚠️ google-cloud-texttospeech not installed. Run: pip install google-cloud-texttospeech")
return False
except Exception as e:
logger.error(f"❌ Google Cloud TTS failed: {e}")
return False
async def _generate_with_edge_tts(text: str, output_path: str) -> bool:
"""
יצירת אודיו עם edge-tts (Fallback)
"""
try:
import edge_tts
logger.info(f"🎙️ Edge TTS (Fallback): Generating audio...")
communicate = edge_tts.Communicate(text, EDGE_TTS_VOICE)
await communicate.save(output_path)
logger.info(f"✅ Edge TTS: Audio saved to {output_path}")
return True
except Exception as e:
logger.error(f"❌ Edge TTS failed: {e}")
return False
async def generate_teacher_audio(text: str, output_path: str = None) -> str:
"""
V273.0: יצירת אודיו עם Google Cloud TTS (איכות גבוהה)
מנסה קודם Google Cloud TTS, אם לא מוגדר/נכשל → edge-tts fallback
Returns:
- Public URL (if Firebase upload success)
- Base64 string (fallback)
- None (if all failed)
"""
try:
if not text:
return None
# Clean text for TTS (remove emojis and special chars that cause issues)
clean_text = _clean_text_for_tts(text)
if not clean_text:
return None
logger.info(f"🎙️ TTS Request: {clean_text[:50]}...")
# Determine output path
if output_path:
os.makedirs(os.path.dirname(output_path), exist_ok=True)
final_path = output_path
else:
timestamp = int(asyncio.get_event_loop().time() * 1000)
final_path = os.path.join(tempfile.gettempdir(), f"audio_{timestamp}.mp3")
# Try Google Cloud TTS first
success = False
if _is_google_cloud_configured():
success = await _generate_with_google_cloud(clean_text, final_path)
else:
logger.info("ℹ️ Google Cloud not configured, using Edge TTS")
# Fallback to edge-tts
if not success and USE_EDGE_TTS_FALLBACK:
success = await _generate_with_edge_tts(clean_text, final_path)
if not success:
logger.error("❌ All TTS methods failed")
return None
# Try Firebase Upload
try:
blob_name = f"audio/{os.path.basename(final_path)}"
loop = asyncio.get_running_loop()
public_url = await loop.run_in_executor(
None,
lambda: firebase_manager.upload_file(final_path, blob_name)
)
if public_url:
logger.info(f"☁️ Firebase URL: {public_url}")
# Clean up local file
if not output_path:
os.remove(final_path)
return public_url
except Exception as fb_err:
logger.warning(f"⚠️ Firebase upload failed ({fb_err}). Using Base64.")
# Fallback: Return Base64
with open(final_path, "rb") as audio_file:
audio_bytes = audio_file.read()
audio_base64 = base64.b64encode(audio_bytes).decode('utf-8')
# Clean up temp file
if not output_path:
os.remove(final_path)
return audio_base64
except Exception as e:
logger.error(f"❌ TTS Generation Failed: {e}")
return None
def _clean_text_for_tts(text: str) -> str:
"""
ניקוי טקסט לפני TTS - הסרת אימוג'ים וסימנים בעייתיים
"""
import re
if not text:
return ""
# Remove emojis
emoji_pattern = re.compile("["
u"\U0001F600-\U0001F64F" # emoticons
u"\U0001F300-\U0001F5FF" # symbols & pictographs
u"\U0001F680-\U0001F6FF" # transport & map symbols
u"\U0001F1E0-\U0001F1FF" # flags
u"\U00002702-\U000027B0"
u"\U000024C2-\U0001F251"
"]+", flags=re.UNICODE)
clean = emoji_pattern.sub('', text)
# Remove multiple spaces
clean = re.sub(r'\s+', ' ', clean)
# Remove LaTeX remnants that might have slipped through
clean = clean.replace('$', '').replace('\\', '')
return clean.strip()
# ═══════════════════════════════════════════════════════════════
# 🧪 Testing
# ═══════════════════════════════════════════════════════════════
if __name__ == "__main__":
async def main():
text = """
איזה יופי של תרגיל! היינו צריכים למצוא את נקודות הקיצון של הפונקציה.
השתמשנו בנגזרת ראשונה כדי למצוא איפה השיפוע מתאפס.
הטריק לזכור - נגזרת אפס תמיד מסמנת נקודת קיצון אפשרית.
כל הכבוד על ההתמדה!
"""
print(f"🎙️ Testing TTS...")
print(f"📝 Text length: {len(text)} chars")
print(f"☁️ Google Cloud configured: {_is_google_cloud_configured()}")
result = await generate_teacher_audio(text)
if result:
if result.startswith("http"):
print(f"✅ Got URL: {result}")
else:
print(f"✅ Got Base64: {len(result)} chars")
else:
print("❌ TTS failed")
asyncio.run(main())