Spaces:
Runtime error
Runtime error
| # For God so loved the world that he gave his only begotten Son, | |
| # that whoever believes in him should not perish but have eternal life. - John 3:16 | |
| """ | |
| app.py - HuggingFace Space for Model 9: Evangelism & Apologetics Pipeline. | |
| Full 4-stage pipeline: Intent Classifier -> Retriever -> Generator -> Theological Guardrails. | |
| Uses ZeroGPU (@spaces.GPU) for on-demand GPU allocation during inference. | |
| """ | |
| import json | |
| from dataclasses import dataclass, field | |
| import gradio as gr | |
| import numpy as np | |
| import spaces | |
| import torch | |
| from peft import PeftModel | |
| from sentence_transformers import SentenceTransformer | |
| from transformers import ( | |
| AutoModelForCausalLM, | |
| AutoModelForSeq2SeqLM, | |
| AutoModelForSequenceClassification, | |
| AutoTokenizer, | |
| BitsAndBytesConfig, | |
| ) | |
| # ─── HuggingFace Model IDs ─── | |
| INTENT_ID_CHIRHO = "LoveJesus/evangelism-intent-classifier-chirho" | |
| RETRIEVER_ID_CHIRHO = "LoveJesus/evangelism-retriever-chirho" | |
| GENERATOR_ID_CHIRHO = "LoveJesus/evangelism-generator-chirho" | |
| DATASET_ID_CHIRHO = "LoveJesus/evangelism-dataset-chirho" | |
| GUARDRAILS_CLASSIFIER_ID_CHIRHO = "LoveJesus/theologian-classifier-chirho" | |
| GUARDRAILS_EMBEDDER_ID_CHIRHO = "LoveJesus/theologian-embedder-chirho" | |
| GUARDRAILS_EXPLAINER_ID_CHIRHO = "LoveJesus/theologian-explainer-chirho" | |
| GENERATOR_BASE_ID_CHIRHO = "Qwen/Qwen3-14B" | |
| # ─── Constants ─── | |
| INTENT_LABELS_CHIRHO = [ | |
| "evangelism_dialogue", | |
| "apologetics_qa", | |
| "creation_science", | |
| "historical_evidence", | |
| "miracle_testimony", | |
| ] | |
| INTENT_DISPLAY_CHIRHO = { | |
| "evangelism_dialogue": "Evangelism Dialogue", | |
| "apologetics_qa": "Apologetics Q&A", | |
| "creation_science": "Creation Science", | |
| "historical_evidence": "Historical Evidence", | |
| "miracle_testimony": "Miracle Testimony", | |
| } | |
| GUARDRAILS_LABELS_CHIRHO = [ | |
| "orthodox_chirho", "arianism_chirho", "pelagianism_chirho", "gnosticism_chirho", | |
| "modalism_chirho", "docetism_chirho", "nestorianism_chirho", "marcionism_chirho", | |
| "apollinarianism_chirho", "monothelitism_chirho", "semi_pelagianism_chirho", | |
| "adoptionism_chirho", "patripassianism_chirho", | |
| ] | |
| SYSTEM_PROMPT_CHIRHO = ( | |
| "You are a knowledgeable Christian apologist and evangelist. " | |
| "Answer questions with Scripture references, sound reasoning, and a heart " | |
| "for sharing the Gospel of Jesus Christ. All answers should be grounded in " | |
| "biblical truth (2 Timothy 3:16). Be respectful, thorough, and always point to Christ." | |
| ) | |
| ORTHODOX_STATEMENTS_CHIRHO = [ | |
| "Jesus Christ is truly God and truly man, one person with two natures.", | |
| "We worship one God in Trinity and Trinity in Unity.", | |
| "The Son is eternally begotten of the Father, not made, of one Being with the Father.", | |
| "The Holy Spirit proceeds from the Father and is worshipped with the Father and Son.", | |
| "Christ has two wills, divine and human, the human freely submitting to the divine.", | |
| "In the incarnation, the Word became flesh and dwelt among us.", | |
| "Salvation is by grace through faith, the gift of God.", | |
| "God created the heavens and the earth, and all that he made was very good.", | |
| ] | |
| MAX_GUARDRAILS_LENGTH_CHIRHO = 256 | |
| MAX_EXPLAINER_INPUT_LENGTH_CHIRHO = 512 | |
| MAX_EXPLAINER_OUTPUT_LENGTH_CHIRHO = 256 | |
| class GuardrailsResultChirho: | |
| """Result from the theological guardrails check.""" | |
| text_chirho: str | |
| overall_label_chirho: str = "unknown" | |
| confidence_chirho: float = 0.0 | |
| heresy_scores_chirho: dict = field(default_factory=dict) | |
| top_heresies_chirho: list = field(default_factory=list) | |
| explanation_chirho: str = "" | |
| embedding_similarity_chirho: float = 0.0 | |
| # ─── Global Model Holders ─── | |
| intent_model_chirho = None | |
| intent_tokenizer_chirho = None | |
| retriever_chirho = None | |
| corpus_passages_chirho = [] | |
| corpus_embeddings_chirho = None | |
| generator_model_chirho = None | |
| generator_tokenizer_chirho = None | |
| guardrails_classifier_chirho = None | |
| guardrails_classifier_tokenizer_chirho = None | |
| guardrails_embedder_chirho = None | |
| guardrails_explainer_chirho = None | |
| guardrails_explainer_tokenizer_chirho = None | |
| orthodox_centroid_chirho = None | |
| device_chirho = None | |
| def _set_inference_mode_chirho(model_chirho): | |
| """Set a PyTorch model to inference mode (disables dropout, batchnorm training).""" | |
| model_chirho.train(False) | |
| # ─── Model Loading ─── | |
| def load_models_chirho(): | |
| """Load all pipeline models: intent classifier, retriever, generator, and guardrails.""" | |
| global intent_model_chirho, intent_tokenizer_chirho | |
| global retriever_chirho, corpus_passages_chirho, corpus_embeddings_chirho | |
| global generator_model_chirho, generator_tokenizer_chirho | |
| global guardrails_classifier_chirho, guardrails_classifier_tokenizer_chirho | |
| global guardrails_embedder_chirho | |
| global guardrails_explainer_chirho, guardrails_explainer_tokenizer_chirho | |
| global orthodox_centroid_chirho, device_chirho | |
| # ZeroGPU pattern: load models to cuda at startup; ZeroGPU intercepts .to('cuda') | |
| device_chirho = torch.device("cuda") | |
| print(f"Loading all models (ZeroGPU intercepts CUDA calls at startup)") | |
| # ── Stage 1: Intent Classifier (RoBERTa-base, ~500MB) ── | |
| print("Loading intent classifier...") | |
| intent_tokenizer_chirho = AutoTokenizer.from_pretrained(INTENT_ID_CHIRHO) | |
| intent_model_chirho = AutoModelForSequenceClassification.from_pretrained(INTENT_ID_CHIRHO) | |
| intent_model_chirho.to(device_chirho) | |
| _set_inference_mode_chirho(intent_model_chirho) | |
| print(" Intent classifier loaded.") | |
| # ── Stage 2: Retriever (MiniLM-L12, ~128MB) ── | |
| print("Loading retriever...") | |
| retriever_chirho = SentenceTransformer(RETRIEVER_ID_CHIRHO, device=str(device_chirho)) | |
| print(" Retriever loaded.") | |
| # ── Corpus Loading ── | |
| print("Loading corpus...") | |
| try: | |
| from huggingface_hub import hf_hub_download | |
| corpus_files_chirho = [ | |
| "raw-chirho/apologetics-chirho/gotquestions-chirho.jsonl", | |
| "raw-chirho/apologetics-chirho/apologetics-articles-chirho.jsonl", | |
| "raw-chirho/evidence-chirho/evidence-expanded-chirho.jsonl", | |
| "raw-chirho/evidence-chirho/creation-science-chirho.jsonl", | |
| "raw-chirho/evidence-chirho/historical-evidence-chirho.jsonl", | |
| "raw-chirho/miracles-chirho/miracle-testimony-expanded-chirho.jsonl", | |
| "raw-chirho/fathers-chirho/early-fathers-apologetics-chirho.jsonl", | |
| "raw-chirho/sermons-chirho/spurgeon-sermons-chirho.jsonl", | |
| "raw-chirho/dialogues-chirho/compiled-dialogues-chirho.jsonl", | |
| "raw-chirho/dialogues-chirho/evangelism-dialogues-seed-chirho.jsonl", | |
| ] | |
| for filename_chirho in corpus_files_chirho: | |
| try: | |
| path_chirho = hf_hub_download( | |
| repo_id=DATASET_ID_CHIRHO, | |
| filename=filename_chirho, | |
| repo_type="dataset", | |
| ) | |
| with open(path_chirho, "r", encoding="utf-8") as f_chirho: | |
| for line_chirho in f_chirho: | |
| line_chirho = line_chirho.strip() | |
| if not line_chirho: | |
| continue | |
| try: | |
| entry_chirho = json.loads(line_chirho) | |
| except json.JSONDecodeError: | |
| continue | |
| text_chirho = _extract_text_chirho(entry_chirho) | |
| if text_chirho and len(text_chirho) > 30: | |
| corpus_passages_chirho.append({ | |
| "text_chirho": text_chirho.strip(), | |
| "source_chirho": entry_chirho.get("source_chirho", "unknown"), | |
| "scripture_chirho": entry_chirho.get("scripture_chirho", []), | |
| "question_chirho": entry_chirho.get("question_chirho", ""), | |
| "category_chirho": entry_chirho.get("category_chirho", ""), | |
| }) | |
| except Exception as e_chirho: | |
| print(f" Warning: Could not load {filename_chirho}: {e_chirho}") | |
| print(f" Loaded {len(corpus_passages_chirho)} passages.") | |
| if corpus_passages_chirho: | |
| print(" Encoding corpus (this may take a moment)...") | |
| texts_chirho = [p_chirho["text_chirho"][:512] for p_chirho in corpus_passages_chirho] | |
| corpus_embeddings_chirho = retriever_chirho.encode( | |
| texts_chirho, | |
| batch_size=128, | |
| show_progress_bar=True, | |
| convert_to_numpy=True, | |
| normalize_embeddings=True, | |
| ) | |
| print(" Corpus index built!") | |
| except Exception as e_chirho: | |
| print(f" Warning: Could not load corpus: {e_chirho}") | |
| # ── Stage 3: Generator (Qwen3-14B + LoRA, 4-bit quantized) ── | |
| print("Loading generator (Qwen3-14B + LoRA, 4-bit NF4)...") | |
| try: | |
| generator_tokenizer_chirho = AutoTokenizer.from_pretrained( | |
| GENERATOR_ID_CHIRHO, subfolder="best-chirho" | |
| ) | |
| if generator_tokenizer_chirho.pad_token is None: | |
| generator_tokenizer_chirho.pad_token = generator_tokenizer_chirho.eos_token | |
| bnb_config_chirho = BitsAndBytesConfig( | |
| load_in_4bit=True, | |
| bnb_4bit_compute_dtype=torch.bfloat16, | |
| bnb_4bit_use_double_quant=True, | |
| bnb_4bit_quant_type="nf4", | |
| ) | |
| base_model_chirho = AutoModelForCausalLM.from_pretrained( | |
| GENERATOR_BASE_ID_CHIRHO, | |
| quantization_config=bnb_config_chirho, | |
| device_map="auto", | |
| trust_remote_code=True, | |
| ) | |
| generator_model_chirho = PeftModel.from_pretrained( | |
| base_model_chirho, GENERATOR_ID_CHIRHO, subfolder="best-chirho" | |
| ) | |
| _set_inference_mode_chirho(generator_model_chirho) | |
| print(" Generator loaded (4-bit, ~7GB VRAM).") | |
| except Exception as e_chirho: | |
| print(f" Warning: Could not load generator: {e_chirho}") | |
| import traceback | |
| traceback.print_exc() | |
| # ── Stage 4: Theological Guardrails (~2.4GB total) ── | |
| print("Loading theological guardrails...") | |
| # Guardrails Classifier (RoBERTa-large, ~1.3GB) | |
| try: | |
| guardrails_classifier_tokenizer_chirho = AutoTokenizer.from_pretrained( | |
| GUARDRAILS_CLASSIFIER_ID_CHIRHO | |
| ) | |
| guardrails_classifier_chirho = AutoModelForSequenceClassification.from_pretrained( | |
| GUARDRAILS_CLASSIFIER_ID_CHIRHO | |
| ) | |
| guardrails_classifier_chirho.to(device_chirho) | |
| _set_inference_mode_chirho(guardrails_classifier_chirho) | |
| print(" Guardrails classifier loaded.") | |
| except Exception as e_chirho: | |
| print(f" Warning: Could not load guardrails classifier: {e_chirho}") | |
| # Guardrails Embedder (MiniLM-L12, ~127MB) | |
| try: | |
| guardrails_embedder_chirho = SentenceTransformer( | |
| GUARDRAILS_EMBEDDER_ID_CHIRHO, device=str(device_chirho) | |
| ) | |
| # Precompute orthodox centroid | |
| embeddings_chirho = guardrails_embedder_chirho.encode(ORTHODOX_STATEMENTS_CHIRHO) | |
| orthodox_centroid_chirho = np.mean(embeddings_chirho, axis=0) | |
| print(" Guardrails embedder loaded + orthodox centroid computed.") | |
| except Exception as e_chirho: | |
| print(f" Warning: Could not load guardrails embedder: {e_chirho}") | |
| # Guardrails Explainer (Flan-T5-base, ~944MB) | |
| try: | |
| guardrails_explainer_tokenizer_chirho = AutoTokenizer.from_pretrained( | |
| GUARDRAILS_EXPLAINER_ID_CHIRHO | |
| ) | |
| guardrails_explainer_chirho = AutoModelForSeq2SeqLM.from_pretrained( | |
| GUARDRAILS_EXPLAINER_ID_CHIRHO | |
| ) | |
| guardrails_explainer_chirho.to(device_chirho) | |
| _set_inference_mode_chirho(guardrails_explainer_chirho) | |
| print(" Guardrails explainer loaded.") | |
| except Exception as e_chirho: | |
| print(f" Warning: Could not load guardrails explainer: {e_chirho}") | |
| print("All models loaded!") | |
| def _extract_text_chirho(entry_chirho): | |
| """Extract searchable text from a corpus entry.""" | |
| if "answer_chirho" in entry_chirho: | |
| return entry_chirho["answer_chirho"][:1000] | |
| elif "evidence_chirho" in entry_chirho: | |
| claim_chirho = entry_chirho.get("claim_chirho", "") | |
| evidence_chirho = entry_chirho.get("evidence_chirho", "") | |
| return f"{claim_chirho}\n{evidence_chirho}"[:1000] | |
| elif "text_chirho" in entry_chirho: | |
| return entry_chirho["text_chirho"][:1000] | |
| elif "argument_chirho" in entry_chirho: | |
| argument_chirho = entry_chirho.get("argument_chirho", "") | |
| context_chirho = entry_chirho.get("context_chirho", "") | |
| return f"{argument_chirho}\n{context_chirho}"[:1000] | |
| return "" | |
| # ─── Stage 3: Generation ─── | |
| def generate_response_chirho(question_chirho, intent_chirho, retrieved_passages_chirho): | |
| """Generate a response using Qwen3-14B + LoRA given the question and retrieved context.""" | |
| if generator_model_chirho is None: | |
| return None | |
| messages_chirho = [{"role": "system", "content": SYSTEM_PROMPT_CHIRHO}] | |
| if intent_chirho == "evangelism_dialogue": | |
| # Direct dialogue: no retrieved context needed | |
| messages_chirho.append({"role": "user", "content": question_chirho}) | |
| else: | |
| # RAG: include top retrieved passages as context | |
| context_text_chirho = "" | |
| for i_chirho, passage_chirho in enumerate(retrieved_passages_chirho[:5], 1): | |
| text_chirho = passage_chirho["text_chirho"][:400] | |
| context_text_chirho += f"[{i_chirho}] {text_chirho}\n\n" | |
| user_content_chirho = ( | |
| f"Context passages:\n{context_text_chirho}\n" | |
| f"Question: {question_chirho}\n\n" | |
| "Using the context passages above and your knowledge of Scripture, " | |
| "provide a thorough answer with Bible references." | |
| ) | |
| messages_chirho.append({"role": "user", "content": user_content_chirho}) | |
| text_chirho = generator_tokenizer_chirho.apply_chat_template( | |
| messages_chirho, tokenize=False, add_generation_prompt=True | |
| ) | |
| inputs_chirho = generator_tokenizer_chirho( | |
| text_chirho, return_tensors="pt", truncation=True, max_length=2048 | |
| ).to(generator_model_chirho.device) | |
| with torch.no_grad(): | |
| output_ids_chirho = generator_model_chirho.generate( | |
| **inputs_chirho, | |
| max_new_tokens=512, | |
| temperature=0.7, | |
| top_p=0.9, | |
| do_sample=True, | |
| pad_token_id=generator_tokenizer_chirho.pad_token_id, | |
| ) | |
| # Decode only the generated tokens (strip the input prompt) | |
| generated_ids_chirho = output_ids_chirho[0][inputs_chirho["input_ids"].shape[-1]:] | |
| response_chirho = generator_tokenizer_chirho.decode( | |
| generated_ids_chirho, skip_special_tokens=True | |
| ) | |
| return response_chirho.strip() | |
| # ─── Stage 4: Theological Guardrails ─── | |
| def check_guardrails_chirho(text_chirho, threshold_chirho=0.5): | |
| """Run the theological guardrails pipeline on generated text.""" | |
| if guardrails_classifier_chirho is None: | |
| return None | |
| result_chirho = GuardrailsResultChirho(text_chirho=text_chirho) | |
| # Step 1: Classify | |
| inputs_chirho = guardrails_classifier_tokenizer_chirho( | |
| text_chirho, return_tensors="pt", | |
| max_length=MAX_GUARDRAILS_LENGTH_CHIRHO, | |
| truncation=True, padding="max_length", | |
| ) | |
| inputs_chirho = {k_chirho: v_chirho.to(device_chirho) for k_chirho, v_chirho in inputs_chirho.items()} | |
| with torch.no_grad(): | |
| outputs_chirho = guardrails_classifier_chirho(**inputs_chirho) | |
| scores_chirho = torch.sigmoid(outputs_chirho.logits).cpu().numpy()[0] | |
| for i_chirho, label_chirho in enumerate(GUARDRAILS_LABELS_CHIRHO): | |
| result_chirho.heresy_scores_chirho[label_chirho] = float(scores_chirho[i_chirho]) | |
| orthodox_score_chirho = scores_chirho[0] | |
| heresy_scores_chirho = scores_chirho[1:] | |
| max_heresy_score_chirho = float(np.max(heresy_scores_chirho)) if len(heresy_scores_chirho) > 0 else 0.0 | |
| if orthodox_score_chirho > threshold_chirho and max_heresy_score_chirho < threshold_chirho: | |
| result_chirho.overall_label_chirho = "orthodox" | |
| result_chirho.confidence_chirho = float(orthodox_score_chirho) | |
| elif max_heresy_score_chirho > threshold_chirho: | |
| result_chirho.overall_label_chirho = "heterodox" | |
| result_chirho.confidence_chirho = float(max_heresy_score_chirho) | |
| result_chirho.top_heresies_chirho = [ | |
| GUARDRAILS_LABELS_CHIRHO[i_chirho + 1] | |
| for i_chirho, s_chirho in enumerate(heresy_scores_chirho) | |
| if s_chirho > threshold_chirho | |
| ] | |
| else: | |
| result_chirho.overall_label_chirho = "uncertain" | |
| result_chirho.confidence_chirho = float(max(orthodox_score_chirho, max_heresy_score_chirho)) | |
| # Step 2: Embed and compare to orthodox centroid | |
| embedding_chirho = guardrails_embedder_chirho.encode([text_chirho])[0] | |
| dot_chirho = np.dot(embedding_chirho, orthodox_centroid_chirho) | |
| norm_a_chirho = np.linalg.norm(embedding_chirho) | |
| norm_b_chirho = np.linalg.norm(orthodox_centroid_chirho) | |
| result_chirho.embedding_similarity_chirho = float(dot_chirho / (norm_a_chirho * norm_b_chirho)) | |
| # Step 3: Explain | |
| heresy_str_chirho = ( | |
| ", ".join(result_chirho.top_heresies_chirho) if result_chirho.top_heresies_chirho else "none" | |
| ) | |
| input_text_chirho = ( | |
| f"explain theological classification: {text_chirho} | " | |
| f"label: {result_chirho.overall_label_chirho} | " | |
| f"heresy types: {heresy_str_chirho}" | |
| ) | |
| exp_inputs_chirho = guardrails_explainer_tokenizer_chirho( | |
| input_text_chirho, return_tensors="pt", | |
| max_length=MAX_EXPLAINER_INPUT_LENGTH_CHIRHO, truncation=True, | |
| ) | |
| exp_inputs_chirho = {k_chirho: v_chirho.to(device_chirho) for k_chirho, v_chirho in exp_inputs_chirho.items()} | |
| with torch.no_grad(): | |
| exp_outputs_chirho = guardrails_explainer_chirho.generate( | |
| **exp_inputs_chirho, | |
| max_length=MAX_EXPLAINER_OUTPUT_LENGTH_CHIRHO, | |
| num_beams=4, | |
| early_stopping=True, | |
| ) | |
| result_chirho.explanation_chirho = guardrails_explainer_tokenizer_chirho.decode( | |
| exp_outputs_chirho[0], skip_special_tokens=True | |
| ) | |
| return result_chirho | |
| # ─── Tab Functions ─── | |
| def classify_intent_tab_chirho(question_chirho: str) -> tuple: | |
| """Tab: Classify the intent of a question.""" | |
| if not question_chirho.strip(): | |
| return "Please enter a question.", "" | |
| inputs_chirho = intent_tokenizer_chirho( | |
| question_chirho, | |
| truncation=True, | |
| max_length=128, | |
| padding="max_length", | |
| return_tensors="pt", | |
| ) | |
| inputs_chirho = {k_chirho: v_chirho.to(device_chirho) for k_chirho, v_chirho in inputs_chirho.items()} | |
| with torch.no_grad(): | |
| outputs_chirho = intent_model_chirho(**inputs_chirho) | |
| probs_chirho = torch.softmax(outputs_chirho.logits, dim=-1).cpu().numpy()[0] | |
| pred_idx_chirho = int(np.argmax(probs_chirho)) | |
| label_chirho = INTENT_LABELS_CHIRHO[pred_idx_chirho] | |
| display_chirho = INTENT_DISPLAY_CHIRHO[label_chirho] | |
| confidence_chirho = float(probs_chirho[pred_idx_chirho]) | |
| main_result_chirho = f"## {display_chirho}\n**Confidence:** {confidence_chirho:.1%}" | |
| scores_lines_chirho = ["| Category | Score |", "| --- | --- |"] | |
| for i_chirho in np.argsort(probs_chirho)[::-1]: | |
| label_i_chirho = INTENT_LABELS_CHIRHO[i_chirho] | |
| display_i_chirho = INTENT_DISPLAY_CHIRHO[label_i_chirho] | |
| score_chirho = float(probs_chirho[i_chirho]) | |
| bar_len_chirho = int(score_chirho * 20) | |
| bar_chirho = "=" * bar_len_chirho | |
| scores_lines_chirho.append( | |
| f"| {display_i_chirho} | {score_chirho:.3f} {bar_chirho} |" | |
| ) | |
| return main_result_chirho, "\n".join(scores_lines_chirho) | |
| def retrieve_passages_tab_chirho(query_chirho: str, k_chirho: int = 5) -> str: | |
| """Tab: Retrieve relevant passages for a question.""" | |
| if not query_chirho.strip(): | |
| return "Please enter a question." | |
| if corpus_embeddings_chirho is None: | |
| return "Corpus index not available. Please try again later." | |
| query_emb_chirho = retriever_chirho.encode( | |
| [query_chirho], normalize_embeddings=True, convert_to_numpy=True | |
| )[0] | |
| similarities_chirho = np.dot(corpus_embeddings_chirho, query_emb_chirho) | |
| top_indices_chirho = np.argsort(similarities_chirho)[::-1][:int(k_chirho)] | |
| lines_chirho = [] | |
| for rank_chirho, idx_chirho in enumerate(top_indices_chirho, 1): | |
| passage_chirho = corpus_passages_chirho[idx_chirho] | |
| sim_chirho = float(similarities_chirho[idx_chirho]) | |
| text_preview_chirho = passage_chirho["text_chirho"][:300] | |
| source_chirho = passage_chirho.get("source_chirho", "unknown") | |
| question_orig_chirho = passage_chirho.get("question_chirho", "") | |
| scripture_chirho = passage_chirho.get("scripture_chirho", []) | |
| lines_chirho.append(f"### #{rank_chirho} (similarity: {sim_chirho:.3f})") | |
| if question_orig_chirho: | |
| lines_chirho.append(f"**Original question:** {question_orig_chirho}") | |
| lines_chirho.append(f"{text_preview_chirho}...") | |
| if scripture_chirho: | |
| if isinstance(scripture_chirho, list): | |
| lines_chirho.append( | |
| f"**Scripture:** {', '.join(str(s_chirho) for s_chirho in scripture_chirho[:5])}" | |
| ) | |
| else: | |
| lines_chirho.append(f"**Scripture:** {scripture_chirho}") | |
| lines_chirho.append(f"*Source: {source_chirho}*") | |
| lines_chirho.append("---") | |
| return "\n\n".join(lines_chirho) | |
| def full_pipeline_tab_chirho(question_chirho: str, k_chirho: int = 5) -> tuple: | |
| """Tab: Full 4-stage pipeline - classify + retrieve + generate + guardrails.""" | |
| if not question_chirho.strip(): | |
| return "Please enter a question.", "", "", "" | |
| # ── Stage 1: Classify Intent ── | |
| inputs_chirho = intent_tokenizer_chirho( | |
| question_chirho, | |
| truncation=True, | |
| max_length=128, | |
| padding="max_length", | |
| return_tensors="pt", | |
| ) | |
| inputs_chirho = {k_c: v_c.to(device_chirho) for k_c, v_c in inputs_chirho.items()} | |
| with torch.no_grad(): | |
| outputs_chirho = intent_model_chirho(**inputs_chirho) | |
| probs_chirho = torch.softmax(outputs_chirho.logits, dim=-1).cpu().numpy()[0] | |
| pred_idx_chirho = int(np.argmax(probs_chirho)) | |
| label_chirho = INTENT_LABELS_CHIRHO[pred_idx_chirho] | |
| display_chirho = INTENT_DISPLAY_CHIRHO[label_chirho] | |
| confidence_chirho = float(probs_chirho[pred_idx_chirho]) | |
| intent_result_chirho = f"## Intent: {display_chirho} ({confidence_chirho:.1%})\n" | |
| if label_chirho == "evangelism_dialogue": | |
| intent_result_chirho += "Routing: Question sent directly to Generator for conversational response." | |
| else: | |
| intent_result_chirho += "Routing: Retriever fetches context passages, then Generator produces Scripture-grounded answer." | |
| # ── Stage 2: Retrieve Passages ── | |
| retrieved_list_chirho = [] | |
| retrieval_result_chirho = "" | |
| if corpus_embeddings_chirho is not None: | |
| query_emb_chirho = retriever_chirho.encode( | |
| [question_chirho], normalize_embeddings=True, convert_to_numpy=True | |
| )[0] | |
| similarities_chirho = np.dot(corpus_embeddings_chirho, query_emb_chirho) | |
| top_indices_chirho = np.argsort(similarities_chirho)[::-1][:int(k_chirho)] | |
| lines_chirho = ["### Retrieved Context Passages\n"] | |
| for rank_chirho, idx_chirho in enumerate(top_indices_chirho, 1): | |
| passage_chirho = corpus_passages_chirho[idx_chirho] | |
| sim_chirho = float(similarities_chirho[idx_chirho]) | |
| text_preview_chirho = passage_chirho["text_chirho"][:200] | |
| lines_chirho.append( | |
| f"**[{rank_chirho}]** (score: {sim_chirho:.3f}) {text_preview_chirho}..." | |
| ) | |
| retrieved_list_chirho.append(passage_chirho) | |
| retrieval_result_chirho = "\n\n".join(lines_chirho) | |
| else: | |
| retrieval_result_chirho = "Corpus not available for retrieval." | |
| # ── Stage 3: Generate Response ── | |
| generated_result_chirho = "" | |
| generated_text_chirho = generate_response_chirho( | |
| question_chirho, label_chirho, retrieved_list_chirho | |
| ) | |
| if generated_text_chirho: | |
| generated_result_chirho = f"### Generated Response\n\n{generated_text_chirho}" | |
| else: | |
| generated_result_chirho = ( | |
| "*Generator not available. In production, the Qwen3-14B + LoRA model " | |
| "generates a Scripture-grounded response from the retrieved context.*" | |
| ) | |
| # ── Stage 4: Theological Guardrails ── | |
| guardrails_result_chirho = "" | |
| if generated_text_chirho and guardrails_classifier_chirho is not None: | |
| result_chirho = check_guardrails_chirho(generated_text_chirho) | |
| if result_chirho is not None: | |
| label_map_chirho = { | |
| "orthodox": "Orthodox", | |
| "heterodox": "Heterodox", | |
| "uncertain": "Uncertain", | |
| "unknown": "Unknown", | |
| } | |
| verdict_chirho = label_map_chirho.get( | |
| result_chirho.overall_label_chirho, result_chirho.overall_label_chirho | |
| ) | |
| lines_chirho = [ | |
| f"### Guardrails Verdict: **{verdict_chirho}** ({result_chirho.confidence_chirho:.1%})", | |
| f"**Orthodox similarity:** {result_chirho.embedding_similarity_chirho:.3f}", | |
| ] | |
| if result_chirho.top_heresies_chirho: | |
| heresies_display_chirho = ", ".join( | |
| h_chirho.replace("_chirho", "").replace("_", " ").title() | |
| for h_chirho in result_chirho.top_heresies_chirho | |
| ) | |
| lines_chirho.append(f"**Detected heresies:** {heresies_display_chirho}") | |
| if result_chirho.explanation_chirho: | |
| lines_chirho.append(f"\n**Explanation:** {result_chirho.explanation_chirho}") | |
| guardrails_result_chirho = "\n\n".join(lines_chirho) | |
| else: | |
| guardrails_result_chirho = "*Guardrails check failed.*" | |
| elif generated_text_chirho is None: | |
| guardrails_result_chirho = ( | |
| "*Guardrails skipped: no generated text to verify.*" | |
| ) | |
| else: | |
| guardrails_result_chirho = ( | |
| "*Guardrails not available. In production, the theological guardrails pipeline " | |
| "(F1=0.997) verifies the response against orthodox Christian doctrine.*" | |
| ) | |
| return intent_result_chirho, retrieval_result_chirho, generated_result_chirho, guardrails_result_chirho | |
| # ─── Build Gradio Interface ─── | |
| def build_demo_chirho() -> gr.Blocks: | |
| """Build the Gradio demo.""" | |
| with gr.Blocks( | |
| title="Evangelism & Apologetics Pipeline - LoveJesus/models-chirho", | |
| theme=gr.themes.Soft(), | |
| ) as demo_chirho: | |
| gr.Markdown("# Evangelism & Apologetics Pipeline") | |
| gr.Markdown( | |
| "*For God so loved the world that he gave his only begotten Son, " | |
| "that whoever believes in him should not perish but have eternal life. - John 3:16*" | |
| ) | |
| gr.Markdown( | |
| "A **4-stage AI pipeline** for answering apologetics questions grounded in Scripture. " | |
| "Intent classification, passage retrieval, response generation (Qwen3-14B + LoRA), " | |
| "and theological guardrails verification (F1=0.997). " | |
| "Part of [bible.systems](https://bible.systems) - Model 9." | |
| ) | |
| with gr.Tab("Ask a Question"): | |
| gr.Markdown( | |
| "Enter any question about Christianity, apologetics, creation, " | |
| "historical evidence, or miracles. The full 4-stage pipeline classifies your intent, " | |
| "retrieves relevant passages, generates a response, and verifies it against " | |
| "orthodox Christian doctrine." | |
| ) | |
| question_input_chirho = gr.Textbox( | |
| label="Your Question", | |
| placeholder="What evidence is there for the resurrection of Jesus?", | |
| lines=2, | |
| ) | |
| k_slider_chirho = gr.Slider(3, 10, value=5, step=1, label="Number of context passages") | |
| ask_btn_chirho = gr.Button("Ask", variant="primary") | |
| intent_output_chirho = gr.Markdown(label="Stage 1: Intent Classification") | |
| retrieval_output_chirho = gr.Markdown(label="Stage 2: Retrieved Passages") | |
| generated_output_chirho = gr.Markdown(label="Stage 3: Generated Response") | |
| guardrails_output_chirho = gr.Markdown(label="Stage 4: Guardrails Verdict") | |
| ask_btn_chirho.click( | |
| full_pipeline_tab_chirho, | |
| inputs=[question_input_chirho, k_slider_chirho], | |
| outputs=[ | |
| intent_output_chirho, | |
| retrieval_output_chirho, | |
| generated_output_chirho, | |
| guardrails_output_chirho, | |
| ], | |
| ) | |
| gr.Examples( | |
| examples=[ | |
| ["What evidence is there for the resurrection of Jesus?"], | |
| ["How do you explain the existence of evil if God is good?"], | |
| ["What does the fossil record really show about evolution?"], | |
| ["Are there any documented modern miracles?"], | |
| ["How can I share the Gospel with someone who doesn't believe in God?"], | |
| ["What non-biblical sources confirm that Jesus existed?"], | |
| ["How do we know the Bible hasn't been changed over time?"], | |
| ["What is the cosmological argument for God's existence?"], | |
| ], | |
| inputs=[question_input_chirho], | |
| ) | |
| with gr.Tab("Classify Intent"): | |
| gr.Markdown( | |
| "Test the **intent classifier** (RoBERTa-base, macro F1=0.83). " | |
| "It routes questions to the appropriate pipeline stage." | |
| ) | |
| cls_input_chirho = gr.Textbox( | |
| label="Question", | |
| placeholder="Enter a question...", | |
| lines=2, | |
| ) | |
| cls_btn_chirho = gr.Button("Classify") | |
| cls_result_chirho = gr.Markdown(label="Result") | |
| cls_scores_chirho = gr.Markdown(label="All Scores") | |
| cls_btn_chirho.click( | |
| classify_intent_tab_chirho, | |
| inputs=[cls_input_chirho], | |
| outputs=[cls_result_chirho, cls_scores_chirho], | |
| ) | |
| gr.Examples( | |
| examples=[ | |
| ["What evidence is there for the resurrection?"], | |
| ["How old is the earth according to science?"], | |
| ["Can you tell me about a modern miracle?"], | |
| ["How would you share the Gospel with an atheist?"], | |
| ["What did Josephus write about Jesus?"], | |
| ], | |
| inputs=[cls_input_chirho], | |
| ) | |
| with gr.Tab("Search Corpus"): | |
| gr.Markdown( | |
| "Search the apologetics corpus using the **retriever** " | |
| "(MiniLM-L12, Pearson=0.90). Over 13,000 passages from " | |
| "GotQuestions, Spurgeon sermons, church fathers, creation science, " | |
| "historical evidence, and miracle testimonies." | |
| ) | |
| search_input_chirho = gr.Textbox( | |
| label="Search Query", | |
| placeholder="Enter a topic or question...", | |
| lines=2, | |
| ) | |
| search_k_chirho = gr.Slider(3, 15, value=5, step=1, label="Number of results") | |
| search_btn_chirho = gr.Button("Search") | |
| search_output_chirho = gr.Markdown() | |
| search_btn_chirho.click( | |
| retrieve_passages_tab_chirho, | |
| inputs=[search_input_chirho, search_k_chirho], | |
| outputs=[search_output_chirho], | |
| ) | |
| gr.Examples( | |
| examples=[ | |
| ["soft tissue dinosaur bones young earth"], | |
| ["manuscript reliability Bible"], | |
| ["fine tuning universe intelligent design"], | |
| ["George Muller answered prayer"], | |
| ["early church fathers apologetics"], | |
| ], | |
| inputs=[search_input_chirho], | |
| ) | |
| with gr.Tab("About"): | |
| gr.Markdown("""# Model 9: Evangelism & Apologetics Pipeline | |
| ## What This Does | |
| This AI system answers questions about Christianity, apologetics, creation science, | |
| historical evidence, and miracles - all grounded in Scripture and verified against | |
| orthodox Christian doctrine. | |
| ## Four-Stage Architecture | |
| ``` | |
| User Question -> [Intent Classifier (RoBERTa-base)] | |
| |-> evangelism_dialogue -> [Generator directly] | |
| |-> apologetics_qa -> [Retriever] -> [Generator] | |
| |-> creation_science -> [Retriever] -> [Generator] | |
| |-> historical_evidence -> [Retriever] -> [Generator] | |
| |-> miracle_testimony -> [Retriever] -> [Generator] | |
| | | |
| [Qwen3-14B + LoRA Generator (4-bit)] | |
| | | |
| [Theological Guardrails (F1=0.997)] | |
| | | |
| Final Response with Scripture references | |
| ``` | |
| | Component | Model | Metric | | |
| | --- | --- | --- | | |
| | **Intent Classifier** | RoBERTa-base (125M) | Macro F1 = 0.83, Weighted F1 = 0.98 | | |
| | **Retriever** | MiniLM-L12-v2 (33M) | Pearson = 0.90, Spearman = 0.86 | | |
| | **Generator** | Qwen3-14B + LoRA (4-bit, 64M trainable) | Perplexity = 4.08 | | |
| | **Guardrails Classifier** | RoBERTa-large (355M) | F1 = 0.997 | | |
| | **Guardrails Embedder** | MiniLM-L12-v2 (33M) | Pearson = 0.970 | | |
| | **Guardrails Explainer** | Flan-T5-base (248M) | loss = 0.0567 | | |
| ## VRAM Budget (~10 GB with 4-bit quantization) | |
| | Component | Size | | |
| | --- | --- | | |
| | Intent Classifier (RoBERTa-base) | ~500 MB | | |
| | Retriever (MiniLM-L12) | ~128 MB | | |
| | Generator (Qwen3-14B + LoRA, 4-bit) | ~7 GB | | |
| | Guardrails Classifier (RoBERTa-large) | ~1.3 GB | | |
| | Guardrails Embedder (MiniLM-L12) | ~127 MB | | |
| | Guardrails Explainer (Flan-T5-base) | ~944 MB | | |
| | **Total** | **~10 GB** | | |
| Runs on ZeroGPU (free H200 allocation) via `@spaces.GPU` decorator. | |
| ## Training Corpus (13,278 source passages) | |
| | Source | Count | Description | | |
| | --- | --- | --- | | |
| | GotQuestions.org | 9,523 | Apologetics Q&A pairs | | |
| | Spurgeon Sermons | 3,464 | 63 volumes of sermon excerpts | | |
| | Apologetics Articles | 85 | Structured apologetics arguments | | |
| | Evidence Expanded | 55 | Creation science + historical evidence | | |
| | Early Church Fathers | 50 | Justin Martyr, Irenaeus, Tertullian, Origen, Athanasius, Augustine | | |
| | Miracle Testimonies | 31 | Documented modern miracles | | |
| | Evangelism Dialogues | 50 | Seeker-evangelist conversation pairs | | |
| | Creation Science | 12 | Detailed scientific evidence entries | | |
| | Historical Evidence | 5 | Non-biblical sources for Jesus | | |
| ## Related Models | |
| - [LoveJesus/evangelism-intent-classifier-chirho](https://huggingface.co/LoveJesus/evangelism-intent-classifier-chirho) | |
| - [LoveJesus/evangelism-retriever-chirho](https://huggingface.co/LoveJesus/evangelism-retriever-chirho) | |
| - [LoveJesus/evangelism-generator-chirho](https://huggingface.co/LoveJesus/evangelism-generator-chirho) | |
| - [LoveJesus/evangelism-dataset-chirho](https://huggingface.co/datasets/LoveJesus/evangelism-dataset-chirho) | |
| - [LoveJesus/theologian-classifier-chirho](https://huggingface.co/LoveJesus/theologian-classifier-chirho) | |
| - [LoveJesus/theologian-embedder-chirho](https://huggingface.co/LoveJesus/theologian-embedder-chirho) | |
| - [LoveJesus/theologian-explainer-chirho](https://huggingface.co/LoveJesus/theologian-explainer-chirho) | |
| --- | |
| Built with love for Jesus. Part of [bible.systems](https://bible.systems). | |
| Published by [LoveJesus](https://huggingface.co/LoveJesus). | |
| """) | |
| return demo_chirho | |
| # Load models at startup | |
| load_models_chirho() | |
| # Launch | |
| demo_chirho = build_demo_chirho() | |
| demo_chirho.launch() | |