# app.py - Intrinsic Intelligence Foundations: Search & Viewer (Gradio) import os, re, json import gradio as gr HF_REPO = os.environ.get("HF_DATASET_REPO", "kadubon/intrinsic-intelligence-foundations") def _load_dataset(): try: from datasets import load_dataset ds = load_dataset(HF_REPO, split="train") return ds, "remote" except Exception as e: local_path = os.environ.get("LOCAL_JSONL", "huggingface_dataset_takahashi.jsonl") rows = [] if os.path.exists(local_path): with open(local_path, "r", encoding="utf-8") as f: for line in f: if line.strip(): rows.append(json.loads(line)) return rows, "local" raise RuntimeError(f"Failed to load dataset: {e}") DS, MODE = _load_dataset() def _iter_records(): if MODE == "remote": for r in DS: yield r else: for r in DS: yield r EQ_PATTERN = re.compile(r"\[\[EQ:([^\]]+)\]\]") def expand_placeholders(text, equations, to="tex"): tmap = {} for e in equations or []: tmap[e.get("id")] = (e.get("tex",""), e.get("mathml","")) if to == "tex": return EQ_PATTERN.sub(lambda m: f"$${tmap.get(m.group(1), ('',''))[0]}$$", text or "") else: return EQ_PATTERN.sub(lambda m: tmap.get(m.group(1), ('',''))[1] or "", text or "") def record_to_md(rec, show="tex", preview_chars=1200): title = rec.get("title","(no title)") doi = rec.get("doi") url = rec.get("urls",{}).get("landing") or (f"https://doi.org/{doi}" if doi else None) authors = rec.get("authors") or [] if authors and isinstance(authors, list): auth = ", ".join([f"{a.get('given','').strip()} {a.get('family','').strip()}".strip() if isinstance(a, dict) else str(a) for a in authors]) else: auth = "K. Takahashi" kws = rec.get("keywords") or [] eqs = rec.get("equations") or [] text = rec.get("fulltext",{}).get("plain","") if show == "tex": body = expand_placeholders(text, eqs, to="tex") md_body = body[:preview_chars] + ("…" if len(body) > preview_chars else "") else: body = expand_placeholders(text, eqs, to="mathml") snippet = body[:preview_chars] + ("…" if len(body) > preview_chars else "") md_body = f"
{snippet}
" meta = [] if url: meta.append(f"[DOI]({url})") if doi and not url: meta.append(f"`{doi}`") if kws: meta.append("keywords: " + ", ".join(kws[:10])) header = f"### 📄 {title}\n\n**Authors:** {auth} \n" + (" \n".join(meta) if meta else "") return header + "\n\n" + md_body def search_dataset(query, show="tex", top_k=5): q = (query or "").strip().lower() if not q: return "Type keywords to search titles/keywords/text." hits = [] for rec in _iter_records(): title = rec.get("title","") kw = " ".join(rec.get("keywords") or []) text = rec.get("fulltext",{}).get("plain","") hay = " ".join([title, kw, text]).lower() if q in hay: hits.append(rec) if len(hits) >= top_k*3: break if not hits: return "No results found." md = [] for rec in hits[:top_k]: md.append(record_to_md(rec, show=show)) return "\n\n---\n\n".join(md) def view_by_index(idx, show="tex"): try: idx = int(idx) except: return "Index must be an integer (0-based)." rec = None if MODE == "remote": if 0 <= idx < len(DS): rec = DS[int(idx)] else: if 0 <= idx < len(DS): rec = DS[int(idx)] if rec is None: return f"Out of range. Available: 0..{len(DS)-1}" return record_to_md(rec, show=show, preview_chars=10000) with gr.Blocks(title="Intrinsic Intelligence Foundations") as demo: gr.Markdown( """ # Intrinsic Intelligence Foundations — Search & Viewer Explore the math-aware dataset (TeX/MathML) for autonomous, self-organizing intelligence research. **Source:** [Hugging Face dataset](https://huggingface.co/datasets/kadubon/intrinsic-intelligence-foundations) """ ) with gr.Row(): query = gr.Textbox(label="Search keywords", placeholder="e.g., teleogenesis, fractal category theory, UGV") show = gr.Radio(choices=["tex","mathml"], value="tex", label="Render equations as") topk = gr.Slider(minimum=1, maximum=10, value=5, step=1, label="Top-K") btn = gr.Button("Search") out = gr.Markdown() btn.click(fn=search_dataset, inputs=[query, show, topk], outputs=out) gr.Markdown("### Or view by 0-based index") with gr.Row(): idx = gr.Number(value=0, precision=0, label="Index") show2 = gr.Radio(choices=["tex","mathml"], value="tex", label="Render equations as", interactive=True) btn2 = gr.Button("View record") out2 = gr.Markdown() btn2.click(fn=view_by_index, inputs=[idx, show2], outputs=out2) if __name__ == "__main__": demo.launch()