DNA-Diffusion

Running

App Files Files Community

openfree commited on Jul 2, 2025

Commit

3499851

verified ·

1 Parent(s): b5e52cd

Update app.py

Browse files

Files changed (1) hide show

app.py +577 -341

app.py CHANGED Viewed

@@ -1,16 +1,21 @@
 """
-DNA-Diffusion Gradio Application
-Interactive DNA sequence generation with slot machine visualization and protein analysis
 """
 import gradio as gr
 import logging
 import json
 import os
-from typing import Dict, Any, Tuple
 import html
 import requests
 import time
 # Configure logging
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
@@ -22,7 +27,6 @@ try:
     SPACES_AVAILABLE = True
 except ImportError:
     SPACES_AVAILABLE = False
-    # Create a dummy decorator if spaces is not available
     class spaces:
         @staticmethod
         def GPU(duration=60):
@@ -30,162 +34,309 @@ except ImportError:
                 return func
             return decorator
-# Try to import model, but allow app to run without it for UI development
 try:
     from dna_diffusion_model import DNADiffusionModel, get_model
     MODEL_AVAILABLE = True
-    logger.info("DNA-Diffusion model module loaded successfully")
 except ImportError as e:
     logger.warning(f"DNA-Diffusion model not available: {e}")
     MODEL_AVAILABLE = False
-# Load the HTML interface
-HTML_FILE = "dna-slot-machine.html"
-if not os.path.exists(HTML_FILE):
-    raise FileNotFoundError(f"HTML interface file '{HTML_FILE}' not found. Please ensure it exists in the same directory as app.py")
-with open(HTML_FILE, "r") as f:
-    SLOT_MACHINE_HTML = f.read()
-class ProteinAnalyzer:
-    """Handles protein translation and analysis using LLM"""
-    # Genetic code table for DNA to amino acid translation
-    CODON_TABLE = {
-        'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L',
-        'TCT': 'S', 'TCC': 'S', 'TCA': 'S', 'TCG': 'S',
-        'TAT': 'Y', 'TAC': 'Y', 'TAA': '*', 'TAG': '*',
-        'TGT': 'C', 'TGC': 'C', 'TGA': '*', 'TGG': 'W',
-        'CTT': 'L', 'CTC': 'L', 'CTA': 'L', 'CTG': 'L',
-        'CCT': 'P', 'CCC': 'P', 'CCA': 'P', 'CCG': 'P',
-        'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 'CAG': 'Q',
-        'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R',
-        'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M',
-        'ACT': 'T', 'ACC': 'T', 'ACA': 'T', 'ACG': 'T',
-        'AAT': 'N', 'AAC': 'N', 'AAA': 'K', 'AAG': 'K',
-        'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 'AGG': 'R',
-        'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V',
-        'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A',
-        'GAT': 'D', 'GAC': 'D', 'GAA': 'E', 'GAG': 'E',
-        'GGT': 'G', 'GGC': 'G', 'GGA': 'G', 'GGG': 'G'
-    }
     @staticmethod
-    def dna_to_protein(dna_sequence: str) -> str:
-        """Translate DNA sequence to protein sequence"""
-        # Ensure sequence is uppercase
-        dna_sequence = dna_sequence.upper()
-        # Remove any non-DNA characters
-        dna_sequence = ''.join(c for c in dna_sequence if c in 'ATCG')
-        # Translate to protein
-        protein = []
-        for i in range(0, len(dna_sequence) - 2, 3):
-            codon = dna_sequence[i:i+3]
-            if len(codon) == 3:
-                amino_acid = ProteinAnalyzer.CODON_TABLE.get(codon, 'X')
-                if amino_acid == '*':  # Stop codon
-                    break
-                protein.append(amino_acid)
-        return ''.join(protein)
     @staticmethod
-    def analyze_protein_with_llm(protein_sequence: str, cell_type: str, language: str = "en") -> str:
-        """Analyze protein structure and function using Friendli LLM API"""
-        # Get API token from environment
-        token = os.getenv("FRIENDLI_TOKEN")
-        if not token:
-            logger.warning("FRIENDLI_TOKEN not found in environment variables")
-            if language == "ko":
-                return "단백질 분석 불가: API 토큰이 설정되지 않았습니다"
-            return "Protein analysis unavailable: API token not configured"
-        try:
-            url = "https://api.friendli.ai/dedicated/v1/chat/completions"
-            headers = {
-                "Authorization": f"Bearer {token}",
-                "Content-Type": "application/json"
-            }
-            # Create prompt for protein analysis based on language
-            if language == "ko":
-                prompt = f"""당신은 생물정보학 전문가입니다. 다음 단백질 서열을 분석하고 잠재적인 구조와 기능에 대한 통찰력을 제공해주세요.
-단백질 서열: {protein_sequence}
-세포 유형: {cell_type}
-다음 내용을 포함해주세요:
-1. 서열 패턴을 기반으로 예측되는 단백질 패밀리 또는 도메인
-2. 잠재적인 구조적 특징 (알파 나선, 베타 시트, 루프)
-3. 가능한 생물학적 기능
-4. {cell_type} 세포 유형과의 관련성
-5. 주목할 만한 서열 모티프나 특성
-과학 애플리케이션에 표시하기에 적합하도록 간결하면서도 유익한 응답을 작성해주세요."""
-            else:
-                prompt = f"""You are a bioinformatics expert. Analyze the following protein sequence and provide insights about its potential structure and function.
-Protein sequence: {protein_sequence}
-Cell type context: {cell_type}
-Please provide:
-1. Predicted protein family or domain based on sequence patterns
-2. Potential structural features (alpha helices, beta sheets, loops)
-3. Possible biological functions
-4. Relevance to the {cell_type} cell type
-5. Any notable sequence motifs or characteristics
-Keep the response concise but informative, suitable for display in a scientific application."""
-            payload = {
-                "model": "dep89a2fld32mcm",
-                "messages": [
-                    {
-                        "role": "system",
-                        "content": "You are a knowledgeable bioinformatics assistant specializing in protein structure and function prediction." if language == "en" else "당신은 단백질 구조와 기능 예측을 전문으로 하는 지식이 풍부한 생물정보학 어시스턴트입니다."
-                    },
-                    {
-                        "role": "user",
-                        "content": prompt
-                    }
-                ],
-                "max_tokens": 1000,
-                "temperature": 0.7,
-                "top_p": 0.8,
-                "stream": False  # Disable streaming for simplicity
-            }
-            response = requests.post(url, json=payload, headers=headers, timeout=30)
-            response.raise_for_status()
-            result = response.json()
-            analysis = result['choices'][0]['message']['content']
-            return analysis
-        except requests.exceptions.RequestException as e:
-            logger.error(f"Failed to analyze protein with LLM: {e}")
-            return f"Protein analysis failed: {str(e)}"
         except Exception as e:
-            logger.error(f"Unexpected error during protein analysis: {e}")
-            return "Protein analysis unavailable due to an error"
-class DNADiffusionApp:
-    """Main application class for DNA-Diffusion Gradio interface"""
     def __init__(self):
         self.model = None
         self.model_loading = False
         self.model_error = None
-        self.protein_analyzer = ProteinAnalyzer()
     def initialize_model(self):
         """Initialize the DNA-Diffusion model"""
         if not MODEL_AVAILABLE:
-            self.model_error = "DNA-Diffusion model module not available. Please install dependencies."
             return
         if self.model_loading:
@@ -205,246 +356,331 @@ class DNADiffusionApp:
             self.model_loading = False
     @spaces.GPU(duration=60)
-    def generate_sequence(self, cell_type: str, guidance_scale: float = 1.0) -> Tuple[str, Dict[str, Any]]:
-        """Generate a DNA sequence using the model or mock data"""
-        # Use mock generation if model is not available
-        if not MODEL_AVAILABLE or self.model is None:
-            logger.warning("Using mock sequence generation")
-            import random
-            sequence = ''.join(random.choice(['A', 'T', 'C', 'G']) for _ in range(200))
-            metadata = {
-                'cell_type': cell_type,
-                'guidance_scale': guidance_scale,
-                'generation_time': 2.0,
-                'mock': True
-            }
-            # Simulate generation time
-            time.sleep(2.0)
-            return sequence, metadata
-        # Use real model
-        try:
-            result = self.model.generate(cell_type, guidance_scale)
-            return result['sequence'], result['metadata']
-        except Exception as e:
-            logger.error(f"Generation failed: {e}")
-            raise
-    def handle_generation_request(self, cell_type: str, guidance_scale: float, language: str = "en"):
-        """Handle sequence generation request from Gradio"""
         try:
-            logger.info(f"Generating sequence for cell type: {cell_type}, language: {language}")
-            # Generate DNA sequence
-            sequence, metadata = self.generate_sequence(cell_type, guidance_scale)
-            # Translate to protein
-            logger.info("Translating DNA to protein sequence...")
-            protein_sequence = self.protein_analyzer.dna_to_protein(sequence)
-            # Add protein sequence to metadata
-            metadata['protein_sequence'] = protein_sequence
-            metadata['protein_length'] = len(protein_sequence)
-            # Analyze protein with LLM
-            logger.info("Analyzing protein structure and function...")
-            protein_analysis = self.protein_analyzer.analyze_protein_with_llm(
-                protein_sequence, cell_type, language
-            )
-            # Add analysis to metadata
-            metadata['protein_analysis'] = protein_analysis
-            logger.info("Generation and analysis complete")
-            return sequence, json.dumps(metadata)
         except Exception as e:
-            error_msg = str(e)
-            logger.error(f"Generation request failed: {error_msg}")
-            return "", json.dumps({"error": error_msg})
 # Create single app instance
-app = DNADiffusionApp()
-def create_demo():
-    """Create the Gradio demo interface"""
-    # CSS to hide backend controls and prevent scrolling
-    css = """
-    #hidden-controls { display: none !important; }
-    .gradio-container {
-        overflow: hidden;
-        background-color: #000000 !important;
-    }
-    #dna-frame { overflow: hidden; position: relative; }
-    body {
-        background-color: #000000 !important;
-    }
-    """
-    # JavaScript for handling communication between iframe and Gradio
-    js = """
-    function() {
-        console.log('Initializing DNA-Diffusion Gradio interface...');
-        // Set up message listener to receive requests from iframe
-        window.addEventListener('message', function(event) {
-            console.log('Parent received message:', event.data);
-            if (event.data.type === 'generate_request') {
-                console.log('Triggering generation for cell type:', event.data.cellType);
-                console.log('Language:', event.data.language);
-                // Update the hidden cell type input
-                const radioInputs = document.querySelectorAll('#cell-type-input input[type="radio"]');
-                radioInputs.forEach(input => {
-                    if (input.value === event.data.cellType) {
-                        input.checked = true;
-                        // Trigger change event
-                        input.dispatchEvent(new Event('change'));
-                    }
-                });
-                // Update the language input
-                const langInputs = document.querySelectorAll('#language-input input[type="radio"]');
-                langInputs.forEach(input => {
-                    if (input.value === event.data.language) {
-                        input.checked = true;
-                        input.dispatchEvent(new Event('change'));
-                    }
-                });
-                // Small delay to ensure radio button update is processed
-                setTimeout(() => {
-                    document.querySelector('#generate-btn').click();
-                }, 100);
-            }
-        });
-        // Function to send sequence to iframe
-        window.sendSequenceToIframe = function(sequence, metadata) {
-            console.log('Sending sequence to iframe:', sequence);
-            const iframe = document.querySelector('#dna-frame iframe');
-            if (iframe && iframe.contentWindow) {
-                try {
-                    const meta = JSON.parse(metadata);
-                    if (meta.error) {
-                        iframe.contentWindow.postMessage({
-                            type: 'generation_error',
-                            error: meta.error
-                        }, '*');
-                    } else {
-                        iframe.contentWindow.postMessage({
-                            type: 'sequence_generated',
-                            sequence: sequence,
-                            metadata: meta
-                        }, '*');
-                    }
-                } catch (e) {
-                    console.error('Failed to parse metadata:', e);
-                    // If parsing fails, still send the sequence
-                    iframe.contentWindow.postMessage({
-                        type: 'sequence_generated',
-                        sequence: sequence,
-                        metadata: {}
-                    }, '*');
-                }
-            } else {
-                console.error('Could not find iframe');
-            }
-        };
-    }
-    """
-    with gr.Blocks(css=css, js=js, theme=gr.themes.Base()) as demo:
-        # Hidden controls for backend processing
-        with gr.Column(elem_id="hidden-controls", visible=False):
-            cell_type_input = gr.Radio(
-                ["K562", "GM12878", "HepG2"],
-                value="K562",
-                label="Cell Type",
-                elem_id="cell-type-input"
-            )
-            language_input = gr.Radio(
-                ["en", "ko"],
-                value="en",
-                label="Language",
-                elem_id="language-input"
-            )
-            guidance_input = gr.Slider(
-                minimum=1.0,
-                maximum=10.0,
-                value=1.0,
-                step=0.5,
-                label="Guidance Scale",
-                elem_id="guidance-input"
-            )
-            generate_btn = gr.Button("Generate", elem_id="generate-btn")
-            sequence_output = gr.Textbox(label="Sequence", elem_id="sequence-output")
-            metadata_output = gr.Textbox(label="Metadata", elem_id="metadata-output")
-        # Main interface - the slot machine in an iframe
-        # Escape the HTML content for srcdoc
-        escaped_html = html.escape(SLOT_MACHINE_HTML, quote=True)
-        iframe_html = f'<iframe srcdoc="{escaped_html}" style="width: 100%; height: 800px; border: none; display: block;"></iframe>'
-        html_display = gr.HTML(
-            iframe_html,
-            elem_id="dna-frame"
-        )
-        # Wire up the generation
         generate_btn.click(
-            fn=app.handle_generation_request,
-            inputs=[cell_type_input, guidance_input, language_input],
-            outputs=[sequence_output, metadata_output]
         ).then(
-            fn=None,
-            inputs=[sequence_output, metadata_output],
-            outputs=None,
-            js="(seq, meta) => sendSequenceToIframe(seq, meta)"
         )
-        # Initialize model on load
-        demo.load(
-            fn=app.initialize_model,
-            inputs=None,
-            outputs=None
         )
     return demo
-# Launch the app
-if __name__ == "__main__":
-    demo = create_demo()
-    # Parse any command line arguments
-    import argparse
-    parser = argparse.ArgumentParser(description="DNA-Diffusion Gradio App")
-    parser.add_argument("--share", action="store_true", help="Create a public shareable link")
-    parser.add_argument("--port", type=int, default=7860, help="Port to run the app on")
-    parser.add_argument("--host", type=str, default="0.0.0.0", help="Host to run the app on")
-    args = parser.parse_args()
-    # For Hugging Face Spaces deployment
-    import os
-    if os.getenv("SPACE_ID"):
-        # Running on Hugging Face Spaces
-        args.host = "0.0.0.0"
-        args.port = 7860
-        args.share = False
-        inbrowser = False
     else:
-        inbrowser = True
-    logger.info(f"Starting DNA-Diffusion Gradio app on {args.host}:{args.port}")
-    demo.launch(
-        share=args.share,
-        server_name=args.host,
-        server_port=args.port,
-        inbrowser=inbrowser
-    )

 """
+Enhanced DNA-Diffusion Gradio Application
+With scientific tools, analysis features, and LLM chat integration
 """
 import gradio as gr
 import logging
 import json
 import os
+from typing import Dict, Any, Tuple, List
 import html
 import requests
 import time
+import numpy as np
+from dataclasses import dataclass
+from datetime import datetime
+import asyncio
+import aiohttp
 # Configure logging
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
     SPACES_AVAILABLE = True
 except ImportError:
     SPACES_AVAILABLE = False
     class spaces:
         @staticmethod
         def GPU(duration=60):
                 return func
             return decorator
+# Try to import model
 try:
     from dna_diffusion_model import DNADiffusionModel, get_model
     MODEL_AVAILABLE = True
 except ImportError as e:
     logger.warning(f"DNA-Diffusion model not available: {e}")
     MODEL_AVAILABLE = False
+# Load the enhanced HTML interface
+HTML_FILE = "enhanced-dna-interface.html"
+# Codon table for translation
+CODON_TABLE = {
+    'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L',
+    'TCT': 'S', 'TCC': 'S', 'TCA': 'S', 'TCG': 'S',
+    'TAT': 'Y', 'TAC': 'Y', 'TAA': '*', 'TAG': '*',
+    'TGT': 'C', 'TGC': 'C', 'TGA': '*', 'TGG': 'W',
+    'CTT': 'L', 'CTC': 'L', 'CTA': 'L', 'CTG': 'L',
+    'CCT': 'P', 'CCC': 'P', 'CCA': 'P', 'CCG': 'P',
+    'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 'CAG': 'Q',
+    'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R',
+    'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M',
+    'ACT': 'T', 'ACC': 'T', 'ACA': 'T', 'ACG': 'T',
+    'AAT': 'N', 'AAC': 'N', 'AAA': 'K', 'AAG': 'K',
+    'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 'AGG': 'R',
+    'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V',
+    'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A',
+    'GAT': 'D', 'GAC': 'D', 'GAA': 'E', 'GAG': 'E',
+    'GGT': 'G', 'GGC': 'G', 'GGA': 'G', 'GGG': 'G'
+}
+# Common restriction enzymes
+RESTRICTION_ENZYMES = {
+    'EcoRI': 'GAATTC',
+    'BamHI': 'GGATCC',
+    'HindIII': 'AAGCTT',
+    'PstI': 'CTGCAG',
+    'SalI': 'GTCGAC',
+    'XbaI': 'TCTAGA',
+    'NotI': 'GCGGCCGC',
+    'XhoI': 'CTCGAG',
+    'NdeI': 'CATATG',
+    'NcoI': 'CCATGG'
+}
+@dataclass
+class AnalysisResult:
+    """Data class for storing analysis results"""
+    sequence: str
+    gc_content: float
+    melting_temp: float
+    restriction_sites: Dict[str, List[int]]
+    orfs: List[Tuple[int, int, str]]
+    primers: Dict[str, Any]
+    protein_analysis: str
+class ScientificAnalyzer:
+    """Enhanced scientific analysis tools"""
+    @staticmethod
+    def calculate_gc_content(sequence: str) -> float:
+        """Calculate GC content percentage"""
+        gc_count = sequence.count('G') + sequence.count('C')
+        return (gc_count / len(sequence)) * 100 if sequence else 0
+    @staticmethod
+    def calculate_melting_temp(sequence: str) -> float:
+        """Calculate melting temperature using nearest neighbor method"""
+        if len(sequence) < 14:
+            # Wallace rule for short sequences
+            return 4 * (sequence.count('G') + sequence.count('C')) + 2 * (sequence.count('A') + sequence.count('T'))
+        else:
+            # Salt-adjusted melting temperature
+            gc_content = ScientificAnalyzer.calculate_gc_content(sequence)
+            return 81.5 + 0.41 * gc_content - 675 / len(sequence)
+    @staticmethod
+    def find_restriction_sites(sequence: str) -> Dict[str, List[int]]:
+        """Find restriction enzyme cut sites"""
+        sites = {}
+        for enzyme, pattern in RESTRICTION_ENZYMES.items():
+            positions = []
+            for i in range(len(sequence) - len(pattern) + 1):
+                if sequence[i:i+len(pattern)] == pattern:
+                    positions.append(i)
+            if positions:
+                sites[enzyme] = positions
+        return sites
     @staticmethod
+    def find_orfs(sequence: str, min_length: int = 100) -> List[Tuple[int, int, str]]:
+        """Find open reading frames"""
+        orfs = []
+        start_codon = 'ATG'
+        stop_codons = ['TAA', 'TAG', 'TGA']
+        for frame in range(3):
+            i = frame
+            while i < len(sequence) - 2:
+                codon = sequence[i:i+3]
+                if codon == start_codon:
+                    # Found start codon, look for stop
+                    for j in range(i + 3, len(sequence) - 2, 3):
+                        codon = sequence[j:j+3]
+                        if codon in stop_codons:
+                            if j - i >= min_length:
+                                orfs.append((i, j + 3, f"Frame +{frame + 1}"))
+                            i = j
+                            break
+                i += 3
+        return orfs
+    @staticmethod
+    def design_primers(sequence: str, product_size: int = 500) -> Dict[str, Any]:
+        """Design PCR primers for the sequence"""
+        primer_length = 20
+        primers = []
+        # Find suitable primer regions
+        for start in range(0, len(sequence) - product_size, 100):
+            forward = sequence[start:start + primer_length]
+            reverse_start = start + product_size - primer_length
+            if reverse_start < len(sequence):
+                reverse = sequence[reverse_start:reverse_start + primer_length]
+                reverse_comp = ScientificAnalyzer.reverse_complement(reverse)
+                # Calculate primer properties
+                forward_tm = ScientificAnalyzer.calculate_melting_temp(forward)
+                reverse_tm = ScientificAnalyzer.calculate_melting_temp(reverse_comp)
+                if abs(forward_tm - reverse_tm) < 5:  # Similar Tm
+                    primers.append({
+                        'forward': forward,
+                        'reverse': reverse_comp,
+                        'forward_tm': forward_tm,
+                        'reverse_tm': reverse_tm,
+                        'product_size': product_size,
+                        'position': start
+                    })
+        return primers[0] if primers else None
     @staticmethod
+    def reverse_complement(sequence: str) -> str:
+        """Get reverse complement of DNA sequence"""
+        complement = {'A': 'T', 'T': 'A', 'C': 'G', 'G': 'C'}
+        return ''.join(complement.get(base, base) for base in reversed(sequence))
+    @staticmethod
+    def codon_optimize(protein_sequence: str, organism: str = "E.coli") -> str:
+        """Optimize codons for expression in target organism"""
+        # Simplified codon optimization - in reality would use organism-specific tables
+        ecoli_preferred_codons = {
+            'F': 'TTT', 'L': 'CTG', 'S': 'TCT', 'Y': 'TAT',
+            'C': 'TGC', 'W': 'TGG', 'P': 'CCG', 'H': 'CAT',
+            'Q': 'CAG', 'R': 'CGT', 'I': 'ATT', 'M': 'ATG',
+            'T': 'ACC', 'N': 'AAC', 'K': 'AAA', 'V': 'GTT',
+            'A': 'GCT', 'D': 'GAT', 'E': 'GAA', 'G': 'GGT'
+        }
+        optimized_dna = ""
+        for aa in protein_sequence:
+            if aa in ecoli_preferred_codons:
+                optimized_dna += ecoli_preferred_codons[aa]
+        return optimized_dna
+class ProteinStructurePredictor:
+    """3D protein structure prediction using external APIs"""
+    @staticmethod
+    async def predict_structure(protein_sequence: str) -> Dict[str, Any]:
+        """Mock structure prediction - would integrate with AlphaFold API"""
+        # Simplified structure prediction
+        structure_data = {
+            'confidence': np.random.uniform(70, 95),
+            'secondary_structure': ProteinStructurePredictor._predict_secondary_structure(protein_sequence),
+            'domains': ProteinStructurePredictor._predict_domains(protein_sequence),
+            'pdb_data': None  # Would contain actual 3D coordinates
+        }
+        return structure_data
+    @staticmethod
+    def _predict_secondary_structure(sequence: str) -> str:
+        """Simple secondary structure prediction"""
+        structure = []
+        for i, aa in enumerate(sequence):
+            if aa in 'VILMFYW':  # Hydrophobic - likely beta sheet
+                structure.append('B')
+            elif aa in 'DEKR':  # Charged - likely loop
+                structure.append('L')
+            else:  # Mixed - likely helix
+                structure.append('H')
+        return ''.join(structure)
+    @staticmethod
+    def _predict_domains(sequence: str) -> List[Dict[str, Any]]:
+        """Predict protein domains"""
+        domains = []
+        # Mock domain prediction
+        if 'CXXC' in sequence or sequence.count('C') > len(sequence) * 0.1:
+            domains.append({
+                'name': 'Zinc finger domain',
+                'start': 0,
+                'end': 30,
+                'confidence': 85
+            })
+        return domains
+class LLMChatAssistant:
+    """LLM-powered scientific chat assistant"""
+    def __init__(self):
+        self.api_token = os.getenv("FRIENDLI_TOKEN")
+        self.conversation_history = []
+    async def chat(self, message: str, context: Dict[str, Any], language: str = "en") -> str:
+        """Chat with the scientific assistant"""
+        if not self.api_token:
+            return "Chat unavailable: API token not configured"
+        try:
+            # Prepare context-aware prompt
+            system_prompt = self._build_system_prompt(language)
+            user_prompt = self._build_user_prompt(message, context, language)
+            # Add to conversation history
+            self.conversation_history.append({"role": "user", "content": message})
+            # Make API call
+            response = await self._call_llm_api(system_prompt, user_prompt)
+            # Add response to history
+            self.conversation_history.append({"role": "assistant", "content": response})
+            return response
         except Exception as e:
+            logger.error(f"Chat error: {e}")
+            return f"Chat error: {str(e)}"
+    def _build_system_prompt(self, language: str) -> str:
+        """Build system prompt for the assistant"""
+        if language == "ko":
+            return """당신은 분자생물학 전문가 AI 어시스턴트입니다.
+            DNA 시퀀스 분석, 단백질 구조 예측, 실험 설계, 프라이머 디자인 등을 도와드립니다.
+            과학적으로 정확하면서도 이해하기 쉽게 설명해드립니다."""
+        else:
+            return """You are an expert molecular biology AI assistant.
+            You help with DNA sequence analysis, protein structure prediction, experiment design, primer design, and more.
+            Provide scientifically accurate yet easy to understand explanations."""
+    def _build_user_prompt(self, message: str, context: Dict[str, Any], language: str) -> str:
+        """Build context-aware user prompt"""
+        context_info = f"""
+        Current sequence: {context.get('sequence', 'None')[:50]}...
+        Cell type: {context.get('cell_type', 'Unknown')}
+        GC content: {context.get('gc_content', 'N/A')}%
+        Restriction sites found: {len(context.get('restriction_sites', {}))}
+        """
+        return f"{context_info}\n\nUser question: {message}"
+    async def _call_llm_api(self, system_prompt: str, user_prompt: str) -> str:
+        """Make async API call to LLM"""
+        url = "https://api.friendli.ai/dedicated/v1/chat/completions"
+        headers = {
+            "Authorization": f"Bearer {self.api_token}",
+            "Content-Type": "application/json"
+        }
+        payload = {
+            "model": "dep89a2fld32mcm",
+            "messages": [
+                {"role": "system", "content": system_prompt},
+                {"role": "user", "content": user_prompt}
+            ],
+            "max_tokens": 500,
+            "temperature": 0.7
+        }
+        async with aiohttp.ClientSession() as session:
+            async with session.post(url, json=payload, headers=headers) as response:
+                result = await response.json()
+                return result['choices'][0]['message']['content']
+class EnhancedDNAApp:
+    """Main application class with enhanced features"""
     def __init__(self):
         self.model = None
         self.model_loading = False
         self.model_error = None
+        self.analyzer = ScientificAnalyzer()
+        self.structure_predictor = ProteinStructurePredictor()
+        self.chat_assistant = LLMChatAssistant()
+        self.current_analysis = None
     def initialize_model(self):
         """Initialize the DNA-Diffusion model"""
         if not MODEL_AVAILABLE:
+            self.model_error = "DNA-Diffusion model module not available"
             return
         if self.model_loading:
             self.model_loading = False
     @spaces.GPU(duration=60)
+    def generate_and_analyze(self, cell_type: str, guidance_scale: float = 1.0, language: str = "en"):
+        """Generate sequence and perform comprehensive analysis"""
         try:
+            # Generate sequence
+            if MODEL_AVAILABLE and self.model:
+                result = self.model.generate(cell_type, guidance_scale)
+                sequence = result['sequence']
+            else:
+                # Mock generation
+                import random
+                sequence = ''.join(random.choice(['A', 'T', 'C', 'G']) for _ in range(200))
+            # Perform comprehensive analysis
+            analysis = self.analyze_sequence(sequence, cell_type)
+            # Store current analysis for chat context
+            self.current_analysis = {
+                'sequence': sequence,
+                'cell_type': cell_type,
+                'gc_content': analysis.gc_content,
+                'restriction_sites': analysis.restriction_sites,
+                'orfs': analysis.orfs,
+                'primers': analysis.primers
+            }
+            return json.dumps({
+                'sequence': sequence,
+                'analysis': {
+                    'gc_content': analysis.gc_content,
+                    'melting_temp': analysis.melting_temp,
+                    'restriction_sites': analysis.restriction_sites,
+                    'orfs': analysis.orfs,
+                    'primers': analysis.primers,
+                    'protein_analysis': analysis.protein_analysis
+                }
+            })
         except Exception as e:
+            logger.error(f"Generation failed: {e}")
+            return json.dumps({"error": str(e)})
+    def analyze_sequence(self, sequence: str, cell_type: str) -> AnalysisResult:
+        """Perform comprehensive sequence analysis"""
+        # Basic analysis
+        gc_content = self.analyzer.calculate_gc_content(sequence)
+        melting_temp = self.analyzer.calculate_melting_temp(sequence)
+        restriction_sites = self.analyzer.find_restriction_sites(sequence)
+        orfs = self.analyzer.find_orfs(sequence)
+        # Primer design
+        primers = self.analyzer.design_primers(sequence)
+        # Protein analysis
+        protein_seq = self.translate_to_protein(sequence)
+        protein_analysis = self.analyze_protein_basic(protein_seq)
+        return AnalysisResult(
+            sequence=sequence,
+            gc_content=gc_content,
+            melting_temp=melting_temp,
+            restriction_sites=restriction_sites,
+            orfs=orfs,
+            primers=primers,
+            protein_analysis=protein_analysis
+        )
+    def translate_to_protein(self, dna_sequence: str) -> str:
+        """Translate DNA to protein"""
+        protein = []
+        for i in range(0, len(dna_sequence) - 2, 3):
+            codon = dna_sequence[i:i+3]
+            if len(codon) == 3:
+                aa = CODON_TABLE.get(codon, 'X')
+                if aa == '*':
+                    break
+                protein.append(aa)
+        return ''.join(protein)
+    def analyze_protein_basic(self, protein_sequence: str) -> str:
+        """Basic protein analysis"""
+        if not protein_sequence:
+            return "No protein sequence generated"
+        # Calculate basic properties
+        length = len(protein_sequence)
+        molecular_weight = sum(self.get_aa_weight(aa) for aa in protein_sequence)
+        # Count amino acid types
+        hydrophobic = sum(1 for aa in protein_sequence if aa in 'AILMFVPW')
+        charged = sum(1 for aa in protein_sequence if aa in 'DEKR')
+        analysis = f"""
+        Protein length: {length} amino acids
+        Molecular weight: ~{molecular_weight:.1f} Da
+        Hydrophobic residues: {hydrophobic} ({hydrophobic/length*100:.1f}%)
+        Charged residues: {charged} ({charged/length*100:.1f}%)
+        """
+        return analysis
+    def get_aa_weight(self, aa: str) -> float:
+        """Get amino acid molecular weight"""
+        weights = {
+            'A': 89.1, 'R': 174.2, 'N': 132.1, 'D': 133.1, 'C': 121.2,
+            'E': 147.1, 'Q': 146.2, 'G': 75.1, 'H': 155.2, 'I': 131.2,
+            'L': 131.2, 'K': 146.2, 'M': 149.2, 'F': 165.2, 'P': 115.1,
+            'S': 105.1, 'T': 119.1, 'W': 204.2, 'Y': 181.2, 'V': 117.1
+        }
+        return weights.get(aa, 100)
+    async def handle_chat(self, message: str, language: str = "en") -> str:
+        """Handle chat messages"""
+        if not self.current_analysis:
+            return "Please generate a sequence first to get context-aware assistance."
+        response = await self.chat_assistant.chat(message, self.current_analysis, language)
+        return response
+    def export_results(self, format_type: str) -> str:
+        """Export analysis results in various formats"""
+        if not self.current_analysis:
+            return "No analysis to export"
+        if format_type == "genbank":
+            return self._export_genbank()
+        elif format_type == "fasta":
+            return self._export_fasta()
+        elif format_type == "json":
+            return json.dumps(self.current_analysis, indent=2)
+        else:
+            return "Unsupported format"
+    def _export_fasta(self) -> str:
+        """Export in FASTA format"""
+        header = f">DNA_Diffusion_{self.current_analysis['cell_type']}_{datetime.now().strftime('%Y%m%d')}"
+        return f"{header}\n{self.current_analysis['sequence']}"
+    def _export_genbank(self) -> str:
+        """Export in GenBank format"""
+        # Simplified GenBank format
+        return f"""LOCUS       DNA_Diffusion   {len(self.current_analysis['sequence'])} bp    DNA     linear   SYN {datetime.now().strftime('%d-%b-%Y')}
+DEFINITION  Synthetic DNA sequence for {self.current_analysis['cell_type']}
+ORIGIN
+        1 {self.current_analysis['sequence']}
+//"""
 # Create single app instance
+app = EnhancedDNAApp()
+def create_enhanced_demo():
+    """Create the enhanced Gradio interface"""
+    with gr.Blocks(theme=gr.themes.Base()) as demo:
+        gr.Markdown("# 🧬 Enhanced DNA-Diffusion with Scientific Tools")
+        with gr.Tabs():
+            with gr.TabItem("🎰 Generate & Analyze"):
+                with gr.Row():
+                    with gr.Column(scale=2):
+                        # Generation controls
+                        cell_type = gr.Radio(
+                            ["K562", "GM12878", "HepG2"],
+                            value="K562",
+                            label="Cell Type"
+                        )
+                        guidance_scale = gr.Slider(
+                            minimum=1.0,
+                            maximum=10.0,
+                            value=1.0,
+                            step=0.5,
+                            label="Guidance Scale"
+                        )
+                        language = gr.Radio(
+                            ["en", "ko"],
+                            value="en",
+                            label="Language"
+                        )
+                        generate_btn = gr.Button("🎲 Generate & Analyze", variant="primary")
+                    with gr.Column(scale=3):
+                        # Results display
+                        results_json = gr.JSON(label="Analysis Results", visible=False)
+                        # Visual results
+                        with gr.Accordion("📊 Sequence Analysis", open=True):
+                            gc_plot = gr.Plot(label="GC Content Distribution")
+                            restriction_map = gr.Plot(label="Restriction Enzyme Map")
+                        with gr.Accordion("🧬 Protein Analysis", open=True):
+                            protein_structure = gr.HTML(label="Predicted Structure")
+                            protein_properties = gr.Textbox(label="Properties", lines=5)
+            with gr.TabItem("💬 AI Assistant"):
+                chatbot = gr.Chatbot(label="Scientific Assistant", height=400)
+                msg = gr.Textbox(label="Ask about your sequence", placeholder="e.g., 'What primers would you recommend?'")
+                chat_btn = gr.Button("Send")
+                # Chat examples
+                gr.Examples(
+                    examples=[
+                        "What restriction enzymes should I use for cloning?",
+                        "Can you explain the ORFs found in this sequence?",
+                        "How can I optimize this sequence for E. coli expression?",
+                        "What's the predicted protein structure?"
+                    ],
+                    inputs=msg
+                )
+            with gr.TabItem("🔧 Tools"):
+                with gr.Row():
+                    with gr.Column():
+                        gr.Markdown("### Primer Design")
+                        primer_length = gr.Slider(18, 25, 20, label="Primer Length")
+                        product_size = gr.Slider(200, 1000, 500, label="Product Size")
+                        design_primers_btn = gr.Button("Design Primers")
+                        primer_results = gr.JSON(label="Designed Primers")
+                    with gr.Column():
+                        gr.Markdown("### Codon Optimization")
+                        target_organism = gr.Dropdown(
+                            ["E. coli", "Yeast", "Human", "Mouse"],
+                            value="E. coli",
+                            label="Target Organism"
+                        )
+                        optimize_btn = gr.Button("Optimize Codons")
+                        optimized_seq = gr.Textbox(label="Optimized Sequence", lines=5)
+            with gr.TabItem("📤 Export"):
+                export_format = gr.Radio(
+                    ["FASTA", "GenBank", "JSON"],
+                    value="FASTA",
+                    label="Export Format"
+                )
+                export_btn = gr.Button("Export Results")
+                export_output = gr.Textbox(label="Exported Data", lines=10)
+        # Wire up the interface
         generate_btn.click(
+            fn=app.generate_and_analyze,
+            inputs=[cell_type, guidance_scale, language],
+            outputs=[results_json]
         ).then(
+            fn=visualize_results,
+            inputs=[results_json],
+            outputs=[gc_plot, restriction_map, protein_structure, protein_properties]
         )
+        # Chat functionality
+        def respond(message, chat_history, language):
+            import asyncio
+            response = asyncio.run(app.handle_chat(message, language))
+            chat_history.append((message, response))
+            return "", chat_history
+        msg.submit(respond, [msg, chatbot, language], [msg, chatbot])
+        chat_btn.click(respond, [msg, chatbot, language], [msg, chatbot])
+        # Export functionality
+        export_btn.click(
+            fn=lambda fmt: app.export_results(fmt.lower()),
+            inputs=[export_format],
+            outputs=[export_output]
         )
+        # Initialize model on load
+        demo.load(fn=app.initialize_model)
     return demo
+def visualize_results(results_json):
+    """Create visualizations from analysis results"""
+    import matplotlib.pyplot as plt
+    import numpy as np
+    if isinstance(results_json, str):
+        data = json.loads(results_json)
     else:
+        data = results_json
+    if "error" in data:
+        return None, None, "<p>Error in analysis</p>", "Error"
+    analysis = data.get('analysis', {})
+    # GC content plot
+    fig1, ax1 = plt.subplots(figsize=(8, 4))
+    gc_content = analysis.get('gc_content', 0)
+    ax1.bar(['GC%', 'AT%'], [gc_content, 100-gc_content], color=['#00ff00', '#ff0000'])
+    ax1.set_ylabel('Percentage')
+    ax1.set_title('Nucleotide Composition')
+    # Restriction map
+    fig2, ax2 = plt.subplots(figsize=(10, 3))
+    sites = analysis.get('restriction_sites', {})
+    seq_len = len(data.get('sequence', ''))
+    y_pos = 0
+    for enzyme, positions in sites.items():
+        for pos in positions:
+            ax2.plot([pos, pos], [y_pos-0.1, y_pos+0.1], 'r-', linewidth=2)
+            ax2.text(pos, y_pos+0.15, enzyme, fontsize=8, ha='center')
+        y_pos += 0.3
+    ax2.set_xlim(0, seq_len)
+    ax2.set_ylim(-0.5, max(0.5, y_pos))
+    ax2.set_xlabel('Position (bp)')
+    ax2.set_title('Restriction Enzyme Sites')
+    # Protein structure (mock visualization)
+    structure_html = """
+    <div style="padding: 20px; background: #f0f0f0; border-radius: 10px;">
+        <h3>🔬 Predicted Secondary Structure</h3>
+        <p>Helices: 45%, Beta sheets: 30%, Loops: 25%</p>
+        <div style="background: linear-gradient(to right, #ff0000 45%, #00ff00 30%, #0000ff 25%);
+                    height: 30px; border-radius: 5px; margin: 10px 0;"></div>
+        <p style="color: #666;">3D structure prediction available in Pro version</p>
+    </div>
+    """
+    # Protein properties
+    properties = analysis.get('protein_analysis', 'No analysis available')
+    return fig1, fig2, structure_html, properties
+# Launch the enhanced app
+if __name__ == "__main__":
+    demo = create_enhanced_demo()
+    demo.launch(share=True)