Spaces:
Running
Running
| """ | |
| Enhanced DNA-Diffusion Gradio Application | |
| With scientific tools, analysis features, and LLM chat integration | |
| """ | |
| import gradio as gr | |
| import logging | |
| import json | |
| import os | |
| from typing import Dict, Any, Tuple, List | |
| import html | |
| import requests | |
| import time | |
| import numpy as np | |
| from dataclasses import dataclass | |
| from datetime import datetime | |
| import asyncio | |
| import aiohttp | |
| # Configure logging | |
| logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') | |
| logger = logging.getLogger(__name__) | |
| # Try to import spaces for GPU decoration | |
| try: | |
| import spaces | |
| SPACES_AVAILABLE = True | |
| except ImportError: | |
| SPACES_AVAILABLE = False | |
| class spaces: | |
| def GPU(duration=60): | |
| def decorator(func): | |
| return func | |
| return decorator | |
| # Try to import model | |
| try: | |
| from dna_diffusion_model import DNADiffusionModel, get_model | |
| MODEL_AVAILABLE = True | |
| except ImportError as e: | |
| logger.warning(f"DNA-Diffusion model not available: {e}") | |
| MODEL_AVAILABLE = False | |
| # Load the enhanced HTML interface | |
| HTML_FILE = "enhanced-dna-interface.html" | |
| # Codon table for translation | |
| CODON_TABLE = { | |
| 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', | |
| 'TCT': 'S', 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', | |
| 'TAT': 'Y', 'TAC': 'Y', 'TAA': '*', 'TAG': '*', | |
| 'TGT': 'C', 'TGC': 'C', 'TGA': '*', 'TGG': 'W', | |
| 'CTT': 'L', 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', | |
| 'CCT': 'P', 'CCC': 'P', 'CCA': 'P', 'CCG': 'P', | |
| 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 'CAG': 'Q', | |
| 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', | |
| 'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', | |
| 'ACT': 'T', 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', | |
| 'AAT': 'N', 'AAC': 'N', 'AAA': 'K', 'AAG': 'K', | |
| 'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 'AGG': 'R', | |
| 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', | |
| 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', | |
| 'GAT': 'D', 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', | |
| 'GGT': 'G', 'GGC': 'G', 'GGA': 'G', 'GGG': 'G' | |
| } | |
| # Common restriction enzymes | |
| RESTRICTION_ENZYMES = { | |
| 'EcoRI': 'GAATTC', | |
| 'BamHI': 'GGATCC', | |
| 'HindIII': 'AAGCTT', | |
| 'PstI': 'CTGCAG', | |
| 'SalI': 'GTCGAC', | |
| 'XbaI': 'TCTAGA', | |
| 'NotI': 'GCGGCCGC', | |
| 'XhoI': 'CTCGAG', | |
| 'NdeI': 'CATATG', | |
| 'NcoI': 'CCATGG' | |
| } | |
| class AnalysisResult: | |
| """Data class for storing analysis results""" | |
| sequence: str | |
| gc_content: float | |
| melting_temp: float | |
| restriction_sites: Dict[str, List[int]] | |
| orfs: List[Tuple[int, int, str]] | |
| primers: Dict[str, Any] | |
| protein_analysis: str | |
| class ScientificAnalyzer: | |
| """Enhanced scientific analysis tools""" | |
| def calculate_gc_content(sequence: str) -> float: | |
| """Calculate GC content percentage""" | |
| gc_count = sequence.count('G') + sequence.count('C') | |
| return (gc_count / len(sequence)) * 100 if sequence else 0 | |
| def calculate_melting_temp(sequence: str) -> float: | |
| """Calculate melting temperature using nearest neighbor method""" | |
| if len(sequence) < 14: | |
| # Wallace rule for short sequences | |
| return 4 * (sequence.count('G') + sequence.count('C')) + 2 * (sequence.count('A') + sequence.count('T')) | |
| else: | |
| # Salt-adjusted melting temperature | |
| gc_content = ScientificAnalyzer.calculate_gc_content(sequence) | |
| return 81.5 + 0.41 * gc_content - 675 / len(sequence) | |
| def find_restriction_sites(sequence: str) -> Dict[str, List[int]]: | |
| """Find restriction enzyme cut sites""" | |
| sites = {} | |
| for enzyme, pattern in RESTRICTION_ENZYMES.items(): | |
| positions = [] | |
| for i in range(len(sequence) - len(pattern) + 1): | |
| if sequence[i:i+len(pattern)] == pattern: | |
| positions.append(i) | |
| if positions: | |
| sites[enzyme] = positions | |
| return sites | |
| def find_orfs(sequence: str, min_length: int = 100) -> List[Tuple[int, int, str]]: | |
| """Find open reading frames""" | |
| orfs = [] | |
| start_codon = 'ATG' | |
| stop_codons = ['TAA', 'TAG', 'TGA'] | |
| for frame in range(3): | |
| i = frame | |
| while i < len(sequence) - 2: | |
| codon = sequence[i:i+3] | |
| if codon == start_codon: | |
| # Found start codon, look for stop | |
| for j in range(i + 3, len(sequence) - 2, 3): | |
| codon = sequence[j:j+3] | |
| if codon in stop_codons: | |
| if j - i >= min_length: | |
| orfs.append((i, j + 3, f"Frame +{frame + 1}")) | |
| i = j | |
| break | |
| i += 3 | |
| return orfs | |
| def design_primers(sequence: str, product_size: int = 500) -> Dict[str, Any]: | |
| """Design PCR primers for the sequence""" | |
| primer_length = 20 | |
| primers = [] | |
| # Find suitable primer regions | |
| for start in range(0, len(sequence) - product_size, 100): | |
| forward = sequence[start:start + primer_length] | |
| reverse_start = start + product_size - primer_length | |
| if reverse_start < len(sequence): | |
| reverse = sequence[reverse_start:reverse_start + primer_length] | |
| reverse_comp = ScientificAnalyzer.reverse_complement(reverse) | |
| # Calculate primer properties | |
| forward_tm = ScientificAnalyzer.calculate_melting_temp(forward) | |
| reverse_tm = ScientificAnalyzer.calculate_melting_temp(reverse_comp) | |
| if abs(forward_tm - reverse_tm) < 5: # Similar Tm | |
| primers.append({ | |
| 'forward': forward, | |
| 'reverse': reverse_comp, | |
| 'forward_tm': forward_tm, | |
| 'reverse_tm': reverse_tm, | |
| 'product_size': product_size, | |
| 'position': start | |
| }) | |
| return primers[0] if primers else None | |
| def reverse_complement(sequence: str) -> str: | |
| """Get reverse complement of DNA sequence""" | |
| complement = {'A': 'T', 'T': 'A', 'C': 'G', 'G': 'C'} | |
| return ''.join(complement.get(base, base) for base in reversed(sequence)) | |
| def codon_optimize(protein_sequence: str, organism: str = "E.coli") -> str: | |
| """Optimize codons for expression in target organism""" | |
| # Simplified codon optimization - in reality would use organism-specific tables | |
| ecoli_preferred_codons = { | |
| 'F': 'TTT', 'L': 'CTG', 'S': 'TCT', 'Y': 'TAT', | |
| 'C': 'TGC', 'W': 'TGG', 'P': 'CCG', 'H': 'CAT', | |
| 'Q': 'CAG', 'R': 'CGT', 'I': 'ATT', 'M': 'ATG', | |
| 'T': 'ACC', 'N': 'AAC', 'K': 'AAA', 'V': 'GTT', | |
| 'A': 'GCT', 'D': 'GAT', 'E': 'GAA', 'G': 'GGT' | |
| } | |
| optimized_dna = "" | |
| for aa in protein_sequence: | |
| if aa in ecoli_preferred_codons: | |
| optimized_dna += ecoli_preferred_codons[aa] | |
| return optimized_dna | |
| class ProteinStructurePredictor: | |
| """3D protein structure prediction using external APIs""" | |
| async def predict_structure(protein_sequence: str) -> Dict[str, Any]: | |
| """Mock structure prediction - would integrate with AlphaFold API""" | |
| # Simplified structure prediction | |
| structure_data = { | |
| 'confidence': np.random.uniform(70, 95), | |
| 'secondary_structure': ProteinStructurePredictor._predict_secondary_structure(protein_sequence), | |
| 'domains': ProteinStructurePredictor._predict_domains(protein_sequence), | |
| 'pdb_data': None # Would contain actual 3D coordinates | |
| } | |
| return structure_data | |
| def _predict_secondary_structure(sequence: str) -> str: | |
| """Simple secondary structure prediction""" | |
| structure = [] | |
| for i, aa in enumerate(sequence): | |
| if aa in 'VILMFYW': # Hydrophobic - likely beta sheet | |
| structure.append('B') | |
| elif aa in 'DEKR': # Charged - likely loop | |
| structure.append('L') | |
| else: # Mixed - likely helix | |
| structure.append('H') | |
| return ''.join(structure) | |
| def _predict_domains(sequence: str) -> List[Dict[str, Any]]: | |
| """Predict protein domains""" | |
| domains = [] | |
| # Mock domain prediction | |
| if 'CXXC' in sequence or sequence.count('C') > len(sequence) * 0.1: | |
| domains.append({ | |
| 'name': 'Zinc finger domain', | |
| 'start': 0, | |
| 'end': 30, | |
| 'confidence': 85 | |
| }) | |
| return domains | |
| class LLMChatAssistant: | |
| """LLM-powered scientific chat assistant""" | |
| def __init__(self): | |
| self.api_token = os.getenv("FRIENDLI_TOKEN") | |
| self.conversation_history = [] | |
| async def chat(self, message: str, context: Dict[str, Any], language: str = "en") -> str: | |
| """Chat with the scientific assistant""" | |
| if not self.api_token: | |
| return "Chat unavailable: API token not configured" | |
| try: | |
| # Prepare context-aware prompt | |
| system_prompt = self._build_system_prompt(language) | |
| user_prompt = self._build_user_prompt(message, context, language) | |
| # Add to conversation history | |
| self.conversation_history.append({"role": "user", "content": message}) | |
| # Make API call | |
| response = await self._call_llm_api(system_prompt, user_prompt) | |
| # Add response to history | |
| self.conversation_history.append({"role": "assistant", "content": response}) | |
| return response | |
| except Exception as e: | |
| logger.error(f"Chat error: {e}") | |
| return f"Chat error: {str(e)}" | |
| def _build_system_prompt(self, language: str) -> str: | |
| """Build system prompt for the assistant""" | |
| if language == "ko": | |
| return """당신은 분자생물학 전문가 AI 어시스턴트입니다. | |
| DNA 시퀀스 분석, 단백질 구조 예측, 실험 설계, 프라이머 디자인 등을 도와드립니다. | |
| 과학적으로 정확하면서도 이해하기 쉽게 설명해드립니다.""" | |
| else: | |
| return """You are an expert molecular biology AI assistant. | |
| You help with DNA sequence analysis, protein structure prediction, experiment design, primer design, and more. | |
| Provide scientifically accurate yet easy to understand explanations.""" | |
| def _build_user_prompt(self, message: str, context: Dict[str, Any], language: str) -> str: | |
| """Build context-aware user prompt""" | |
| context_info = f""" | |
| Current sequence: {context.get('sequence', 'None')[:50]}... | |
| Cell type: {context.get('cell_type', 'Unknown')} | |
| GC content: {context.get('gc_content', 'N/A')}% | |
| Restriction sites found: {len(context.get('restriction_sites', {}))} | |
| """ | |
| return f"{context_info}\n\nUser question: {message}" | |
| async def _call_llm_api(self, system_prompt: str, user_prompt: str) -> str: | |
| """Make async API call to LLM""" | |
| url = "https://api.friendli.ai/dedicated/v1/chat/completions" | |
| headers = { | |
| "Authorization": f"Bearer {self.api_token}", | |
| "Content-Type": "application/json" | |
| } | |
| payload = { | |
| "model": "dep89a2fld32mcm", | |
| "messages": [ | |
| {"role": "system", "content": system_prompt}, | |
| {"role": "user", "content": user_prompt} | |
| ], | |
| "max_tokens": 500, | |
| "temperature": 0.7 | |
| } | |
| async with aiohttp.ClientSession() as session: | |
| async with session.post(url, json=payload, headers=headers) as response: | |
| result = await response.json() | |
| return result['choices'][0]['message']['content'] | |
| class EnhancedDNAApp: | |
| """Main application class with enhanced features""" | |
| def __init__(self): | |
| self.model = None | |
| self.model_loading = False | |
| self.model_error = None | |
| self.analyzer = ScientificAnalyzer() | |
| self.structure_predictor = ProteinStructurePredictor() | |
| self.chat_assistant = LLMChatAssistant() | |
| self.current_analysis = None | |
| def initialize_model(self): | |
| """Initialize the DNA-Diffusion model""" | |
| if not MODEL_AVAILABLE: | |
| self.model_error = "DNA-Diffusion model module not available" | |
| return | |
| if self.model_loading: | |
| return | |
| self.model_loading = True | |
| try: | |
| logger.info("Starting model initialization...") | |
| self.model = get_model() | |
| logger.info("Model initialized successfully!") | |
| self.model_error = None | |
| except Exception as e: | |
| logger.error(f"Failed to initialize model: {e}") | |
| self.model_error = str(e) | |
| self.model = None | |
| finally: | |
| self.model_loading = False | |
| def generate_and_analyze(self, cell_type: str, guidance_scale: float = 1.0, language: str = "en"): | |
| """Generate sequence and perform comprehensive analysis""" | |
| try: | |
| # Generate sequence | |
| if MODEL_AVAILABLE and self.model: | |
| result = self.model.generate(cell_type, guidance_scale) | |
| sequence = result['sequence'] | |
| else: | |
| # Mock generation | |
| import random | |
| sequence = ''.join(random.choice(['A', 'T', 'C', 'G']) for _ in range(200)) | |
| # Perform comprehensive analysis | |
| analysis = self.analyze_sequence(sequence, cell_type) | |
| # Store current analysis for chat context | |
| self.current_analysis = { | |
| 'sequence': sequence, | |
| 'cell_type': cell_type, | |
| 'gc_content': analysis.gc_content, | |
| 'restriction_sites': analysis.restriction_sites, | |
| 'orfs': analysis.orfs, | |
| 'primers': analysis.primers | |
| } | |
| return json.dumps({ | |
| 'sequence': sequence, | |
| 'analysis': { | |
| 'gc_content': analysis.gc_content, | |
| 'melting_temp': analysis.melting_temp, | |
| 'restriction_sites': analysis.restriction_sites, | |
| 'orfs': analysis.orfs, | |
| 'primers': analysis.primers, | |
| 'protein_analysis': analysis.protein_analysis | |
| } | |
| }) | |
| except Exception as e: | |
| logger.error(f"Generation failed: {e}") | |
| return json.dumps({"error": str(e)}) | |
| def analyze_sequence(self, sequence: str, cell_type: str) -> AnalysisResult: | |
| """Perform comprehensive sequence analysis""" | |
| # Basic analysis | |
| gc_content = self.analyzer.calculate_gc_content(sequence) | |
| melting_temp = self.analyzer.calculate_melting_temp(sequence) | |
| restriction_sites = self.analyzer.find_restriction_sites(sequence) | |
| orfs = self.analyzer.find_orfs(sequence) | |
| # Primer design | |
| primers = self.analyzer.design_primers(sequence) | |
| # Protein analysis | |
| protein_seq = self.translate_to_protein(sequence) | |
| protein_analysis = self.analyze_protein_basic(protein_seq) | |
| return AnalysisResult( | |
| sequence=sequence, | |
| gc_content=gc_content, | |
| melting_temp=melting_temp, | |
| restriction_sites=restriction_sites, | |
| orfs=orfs, | |
| primers=primers, | |
| protein_analysis=protein_analysis | |
| ) | |
| def translate_to_protein(self, dna_sequence: str) -> str: | |
| """Translate DNA to protein""" | |
| protein = [] | |
| for i in range(0, len(dna_sequence) - 2, 3): | |
| codon = dna_sequence[i:i+3] | |
| if len(codon) == 3: | |
| aa = CODON_TABLE.get(codon, 'X') | |
| if aa == '*': | |
| break | |
| protein.append(aa) | |
| return ''.join(protein) | |
| def analyze_protein_basic(self, protein_sequence: str) -> str: | |
| """Basic protein analysis""" | |
| if not protein_sequence: | |
| return "No protein sequence generated" | |
| # Calculate basic properties | |
| length = len(protein_sequence) | |
| molecular_weight = sum(self.get_aa_weight(aa) for aa in protein_sequence) | |
| # Count amino acid types | |
| hydrophobic = sum(1 for aa in protein_sequence if aa in 'AILMFVPW') | |
| charged = sum(1 for aa in protein_sequence if aa in 'DEKR') | |
| analysis = f""" | |
| Protein length: {length} amino acids | |
| Molecular weight: ~{molecular_weight:.1f} Da | |
| Hydrophobic residues: {hydrophobic} ({hydrophobic/length*100:.1f}%) | |
| Charged residues: {charged} ({charged/length*100:.1f}%) | |
| """ | |
| return analysis | |
| def get_aa_weight(self, aa: str) -> float: | |
| """Get amino acid molecular weight""" | |
| weights = { | |
| 'A': 89.1, 'R': 174.2, 'N': 132.1, 'D': 133.1, 'C': 121.2, | |
| 'E': 147.1, 'Q': 146.2, 'G': 75.1, 'H': 155.2, 'I': 131.2, | |
| 'L': 131.2, 'K': 146.2, 'M': 149.2, 'F': 165.2, 'P': 115.1, | |
| 'S': 105.1, 'T': 119.1, 'W': 204.2, 'Y': 181.2, 'V': 117.1 | |
| } | |
| return weights.get(aa, 100) | |
| async def handle_chat(self, message: str, language: str = "en") -> str: | |
| """Handle chat messages""" | |
| if not self.current_analysis: | |
| return "Please generate a sequence first to get context-aware assistance." | |
| response = await self.chat_assistant.chat(message, self.current_analysis, language) | |
| return response | |
| def export_results(self, format_type: str) -> str: | |
| """Export analysis results in various formats""" | |
| if not self.current_analysis: | |
| return "No analysis to export" | |
| if format_type == "genbank": | |
| return self._export_genbank() | |
| elif format_type == "fasta": | |
| return self._export_fasta() | |
| elif format_type == "json": | |
| return json.dumps(self.current_analysis, indent=2) | |
| else: | |
| return "Unsupported format" | |
| def _export_fasta(self) -> str: | |
| """Export in FASTA format""" | |
| header = f">DNA_Diffusion_{self.current_analysis['cell_type']}_{datetime.now().strftime('%Y%m%d')}" | |
| return f"{header}\n{self.current_analysis['sequence']}" | |
| def _export_genbank(self) -> str: | |
| """Export in GenBank format""" | |
| # Simplified GenBank format | |
| return f"""LOCUS DNA_Diffusion {len(self.current_analysis['sequence'])} bp DNA linear SYN {datetime.now().strftime('%d-%b-%Y')} | |
| DEFINITION Synthetic DNA sequence for {self.current_analysis['cell_type']} | |
| ORIGIN | |
| 1 {self.current_analysis['sequence']} | |
| //""" | |
| # Create single app instance | |
| app = EnhancedDNAApp() | |
| def create_enhanced_demo(): | |
| """Create the enhanced Gradio interface""" | |
| with gr.Blocks(theme=gr.themes.Base()) as demo: | |
| gr.Markdown("# 🧬 Enhanced DNA-Diffusion with Scientific Tools") | |
| with gr.Tabs(): | |
| with gr.TabItem("🎰 Generate & Analyze"): | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| # Generation controls | |
| cell_type = gr.Radio( | |
| ["K562", "GM12878", "HepG2"], | |
| value="K562", | |
| label="Cell Type" | |
| ) | |
| guidance_scale = gr.Slider( | |
| minimum=1.0, | |
| maximum=10.0, | |
| value=1.0, | |
| step=0.5, | |
| label="Guidance Scale" | |
| ) | |
| language = gr.Radio( | |
| ["en", "ko"], | |
| value="en", | |
| label="Language" | |
| ) | |
| generate_btn = gr.Button("🎲 Generate & Analyze", variant="primary") | |
| with gr.Column(scale=3): | |
| # Results display | |
| results_json = gr.JSON(label="Analysis Results", visible=False) | |
| # Visual results | |
| with gr.Accordion("📊 Sequence Analysis", open=True): | |
| gc_plot = gr.Plot(label="GC Content Distribution") | |
| restriction_map = gr.Plot(label="Restriction Enzyme Map") | |
| with gr.Accordion("🧬 Protein Analysis", open=True): | |
| protein_structure = gr.HTML(label="Predicted Structure") | |
| protein_properties = gr.Textbox(label="Properties", lines=5) | |
| with gr.TabItem("💬 AI Assistant"): | |
| chatbot = gr.Chatbot(label="Scientific Assistant", height=400) | |
| msg = gr.Textbox(label="Ask about your sequence", placeholder="e.g., 'What primers would you recommend?'") | |
| chat_btn = gr.Button("Send") | |
| # Chat examples | |
| gr.Examples( | |
| examples=[ | |
| "What restriction enzymes should I use for cloning?", | |
| "Can you explain the ORFs found in this sequence?", | |
| "How can I optimize this sequence for E. coli expression?", | |
| "What's the predicted protein structure?" | |
| ], | |
| inputs=msg | |
| ) | |
| with gr.TabItem("🔧 Tools"): | |
| with gr.Row(): | |
| with gr.Column(): | |
| gr.Markdown("### Primer Design") | |
| primer_length = gr.Slider(18, 25, 20, label="Primer Length") | |
| product_size = gr.Slider(200, 1000, 500, label="Product Size") | |
| design_primers_btn = gr.Button("Design Primers") | |
| primer_results = gr.JSON(label="Designed Primers") | |
| with gr.Column(): | |
| gr.Markdown("### Codon Optimization") | |
| target_organism = gr.Dropdown( | |
| ["E. coli", "Yeast", "Human", "Mouse"], | |
| value="E. coli", | |
| label="Target Organism" | |
| ) | |
| optimize_btn = gr.Button("Optimize Codons") | |
| optimized_seq = gr.Textbox(label="Optimized Sequence", lines=5) | |
| with gr.TabItem("📤 Export"): | |
| export_format = gr.Radio( | |
| ["FASTA", "GenBank", "JSON"], | |
| value="FASTA", | |
| label="Export Format" | |
| ) | |
| export_btn = gr.Button("Export Results") | |
| export_output = gr.Textbox(label="Exported Data", lines=10) | |
| # Wire up the interface | |
| generate_btn.click( | |
| fn=app.generate_and_analyze, | |
| inputs=[cell_type, guidance_scale, language], | |
| outputs=[results_json] | |
| ).then( | |
| fn=visualize_results, | |
| inputs=[results_json], | |
| outputs=[gc_plot, restriction_map, protein_structure, protein_properties] | |
| ) | |
| # Chat functionality | |
| def respond(message, chat_history, language): | |
| import asyncio | |
| response = asyncio.run(app.handle_chat(message, language)) | |
| chat_history.append((message, response)) | |
| return "", chat_history | |
| msg.submit(respond, [msg, chatbot, language], [msg, chatbot]) | |
| chat_btn.click(respond, [msg, chatbot, language], [msg, chatbot]) | |
| # Export functionality | |
| export_btn.click( | |
| fn=lambda fmt: app.export_results(fmt.lower()), | |
| inputs=[export_format], | |
| outputs=[export_output] | |
| ) | |
| # Initialize model on load | |
| demo.load(fn=app.initialize_model) | |
| return demo | |
| def visualize_results(results_json): | |
| """Create visualizations from analysis results""" | |
| import matplotlib.pyplot as plt | |
| import numpy as np | |
| if isinstance(results_json, str): | |
| data = json.loads(results_json) | |
| else: | |
| data = results_json | |
| if "error" in data: | |
| return None, None, "<p>Error in analysis</p>", "Error" | |
| analysis = data.get('analysis', {}) | |
| # GC content plot | |
| fig1, ax1 = plt.subplots(figsize=(8, 4)) | |
| gc_content = analysis.get('gc_content', 0) | |
| ax1.bar(['GC%', 'AT%'], [gc_content, 100-gc_content], color=['#00ff00', '#ff0000']) | |
| ax1.set_ylabel('Percentage') | |
| ax1.set_title('Nucleotide Composition') | |
| # Restriction map | |
| fig2, ax2 = plt.subplots(figsize=(10, 3)) | |
| sites = analysis.get('restriction_sites', {}) | |
| seq_len = len(data.get('sequence', '')) | |
| y_pos = 0 | |
| for enzyme, positions in sites.items(): | |
| for pos in positions: | |
| ax2.plot([pos, pos], [y_pos-0.1, y_pos+0.1], 'r-', linewidth=2) | |
| ax2.text(pos, y_pos+0.15, enzyme, fontsize=8, ha='center') | |
| y_pos += 0.3 | |
| ax2.set_xlim(0, seq_len) | |
| ax2.set_ylim(-0.5, max(0.5, y_pos)) | |
| ax2.set_xlabel('Position (bp)') | |
| ax2.set_title('Restriction Enzyme Sites') | |
| # Protein structure (mock visualization) | |
| structure_html = """ | |
| <div style="padding: 20px; background: #f0f0f0; border-radius: 10px;"> | |
| <h3>🔬 Predicted Secondary Structure</h3> | |
| <p>Helices: 45%, Beta sheets: 30%, Loops: 25%</p> | |
| <div style="background: linear-gradient(to right, #ff0000 45%, #00ff00 30%, #0000ff 25%); | |
| height: 30px; border-radius: 5px; margin: 10px 0;"></div> | |
| <p style="color: #666;">3D structure prediction available in Pro version</p> | |
| </div> | |
| """ | |
| # Protein properties | |
| properties = analysis.get('protein_analysis', 'No analysis available') | |
| return fig1, fig2, structure_html, properties | |
| # Launch the enhanced app | |
| if __name__ == "__main__": | |
| demo = create_enhanced_demo() | |
| demo.launch(share=True) |